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SELF-DESCRIBING, INFRASTRUCTURE-INDEPENDENT 
REPRESENTATION OF LOGICAL STRUCTURE OF 
COLLECTION 



SELF-DESCRIBING, INFRASTRUCTURE-INDEPENDENT 
REPRESENTATION OF DATA OBJECTS 



104 



SELF-DESCRIBING, INFRASTRUCTURE-INDEPENDENT 
REPRESENTATION OF PRESENTATION MECHANISM 

(OPTIONAL) 
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DATABASE RECORDS: 



Customer Id: cl500 

First: Joe 

Last: Smith 

City: San Diego 

Email: joesmith@company.com 

Phone: (555) 555-5555 

Fax: (555) 551-5555 

Customer Id: cl600 

First: John 

Last: Smith 

City: San Diego 

Email: johnsmith@company.com 

Phone: (555) 552-5555 

Fax: (555) 553-5555 



FIGURE 2 



PERSISTENT ARCHIVES 
inventors: Reagan W. Moore, et al. 
Howrey Docket No. 02737.0004 .NPUS01 
3/118 



CUSTOMER.DTD 



300 
302 

304 {_ 
306 -< 



<! ELEMENT CUSTOMER 

(customer_name+, email*, phone*, fax?)> 

<!ATTLIST CUSTOMER customer_id ID #REQUIRED> 

<! ATTLIST CUSTOMER city #REQUIRED> 

<! — ? = zero or one; * = zero or more; + = one or more — > 

<ELEMENT customer_name (first+, last+)> 

<ELEMENT first (#PCDATA)> 

<ELEMENT last (#PCDATA)> 

<ELEMENT email (#PCDATA)> 

<ELEMENT phone (#PCDATA)> 

<ELEMENT fax (#PCDATA)> 



FIGURE 3A 



XML DOCUMENT: 

r <?xml version="l .0" standalone="no"?> 
308 \ <!DOCTYPE CUSTOMER SYSTEM "customer. dtd"> 
310 {_ <CUSTOMERcustomer_id="cl500"city="sandiego"> 
312 <customer_name> 
<first>Joe</first> 
<last >Smith</last> 
312 ^ </customer_name> 

f <email>joesmith@company.com</email> 
316 -l <phone> (555) 555-5555</phone> 

I <fax> (555) 555-5555</fax> 

310 1 </CUSTOMER> 



314 
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<?xml version="1.0" standalone- 'yes"?> 
<!DOCTYPE CUSTOMER 

[ 

<! ELEMENT CUSTOMER 

(customer_name+, email*, phone*, fax?)> 

<!ATTLIST CUSTOMER customer_id ID #REQUIRED> 

<! ATTLIST CUSTOMER city #REQUIRED> 

<!--? = zero or one; * — zero or more; + - one or more — > 

<ELEMENT customername (first+, last+)> 

<ELEMENT first (#PCDATA)> 

<ELEMENT last (#PCDATA)> 

<ELEMENT email (#PCDATA)> 

<ELEMENT phone (#PCDATA)> 

<ELEMENT fax (#PCDATA)> 

]> 



<CUSTOMER customer_id="cl500" city="san diego"> 
<customer_name> 

<first>Joe</first> 

<last >Smith</last> 
</customer_name> 

<email>joesmith@company.com</email> 
<phone> (555) 555-5555</phone> 
<fax> (555) 555-5555</fax> 
</CUSTOMER> 
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XML DOCUMENT (INPUT DATA) : 

<sales> 

<division id="North"> 

<revenue> 1 0</re venue> 

<growth>9</growth> 

<bonus>7</bonus> 

</division> 

<division id- 'South"> 

<revenue>4</revenue> 

<growth>3</growth> 

<bonus>4</bonus> 

</division> 

<division id- 'West"> 

<revenue>6</revenue> 
<growth>-l .5</growth> 
<bonus>2</bonus> 

</division> 

</sales> 
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XSL STYLE SHEET : 

<html xsl:version="1.0" 

xmlns:xsl= ,, http://ww.w3.org/1999/XSUTransform M 
lang="en"> 
<head> 

<title>Sales Results By Division</title> 
</head> 
<body> 

<table border^" 1"> 
<tr> 

<th>Division</th> 
<th>Revenue</th> 
<th>Growth</th> 
<th>Bonus</th> 
</tr> 

<xsl:for-each select="sales/division"> 



<!-- order the result by revenue — > 
<xsl:sort select="revenue M 

data-type-'number" 

order= M descending7> 

<tr> 
<td> 

<emXxsl:value-of select- f @id7></em> 
</td> 
<td> 

<xsl:value-of select="revenue ,, /> 
</td> 
<td> 

<!-- highlight negative growth in red — > 
<xsl:if test= H growth &i t; o n > 

<xsl: attribute name="style"> 
<xsl : text>color:red</xsl : text> 

</xsl:attribute> 
</xsl:if> 

<xsl:value-of select= M growth7> 
</td> 
<td> 

<xsl:value-of select="bonus M /> 
</td> 



</tr> 
</xsl:for-each> 
</table> 
</body> 
</html> 
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HTML OUTPUT : 

<html lang="en"> 
<head> 

<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-l M > 

<title>Sales Results By Division</title> 

</head> 

<body> 

<table border-" 1"> 
<tr> 

<th>Division</th><th>^ 

</tr> 

<tr> 

<td><em>North</em></td><td>10</td><td>9</td><td>7</td> 

</tr> 

<tr> 

<td><em>West</em></td><td>6</td><td style= ,, color:red M >- 1 .5</tdxtd>2</td> 

</tr> 

<tr> 

<td><em>South<em></td><td>4</td><td>3</td><td>4</td> 

</tr> 

</table> 

</body> 

</html> 
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{PRIVATE} NARA_article_begin : 

Path: news.sdsc.edu!newshub.csu.net!newshub.sdsu.edu!newsfeed.berkeley.edu! 

news.cis.ohiostate.edu!news. roots web. com! roots web-gw 
From: Casivers@aol.com 
Newsgroups: soc. genealogy. hispanic 
Subject: Passenger Lists for Ships from Spain To Cuba 
Date: 22 Mar 1999 16:20:37 -0800 
Organization: Roots Web Genealogical Data Cooperative 
Lines: 7 

Message-ID: <237632 1.36f6de03@aol.com 
NNTP-Posting-Host: localhost 
Mime- Version: 1.0 

Content-Type: text/plain; charset=US-ASCII 
Content-Transfer-Encoding: 7bit 

X-Trace: bl-1. rootsweb.com 922148437 3147 127.0.0.1 (23 Mar 1999 00:20:37 GMT) 
X-Complaints-To: usenet@news.rootsweb.com 
NNTP-Posting-Date: 23 Mar 1999 00:20:37 GMT 
Xref: news.sdsc.edu soc. genealogy. hispanic:3 156 

Does anyone know where I can get passengers lists for 
ships that transported Spaniards to Cuba circa 1860's? 
Any help would be appreciated. 
Thanks, 

Cheryl Sanchez-Sivers 

NARA_article_end : 
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<!E LEM ENT rfcl036_mesg (headers, body)> 

<!ELEM ENT headers (required_headers, optional_headers, other_headers)> 
<!ELEM ENT body #PCDATA> 

<!ELEMENT required_headers (From, Date, Newsgroups, Subject, Message-ID, Path)> 

<!ELEM ENT optional_headers (Folloup-To?, Expires?, Reply-To?, Sender?, References?, Control?, Distribution?, 

Keywords?, Summary?, Approved?, Lines?, Xref?, Organization?)> 
<!ELEMENT other headers other* > 



<!E LEM ENT From 
<!E LEM ENT Date 
<!ELEMENT Newsgroups 
<!E LEM ENT Subject 
<!E LEM ENT Message-ID 
<!E LEM ENT Path 



#PCDATA> 
#PCDATA> 
#PCDATA> 
#PCDATA> 
#PCDATA> 
#PCDATA> 



<!ATTLIST From seqno 
<!ATTLIST Date seqno 
<!ATTLIST Newsgroups 
<!ATTLIST Subject seqno 
<!ATTLIST Message-ID 
<!ATTLIST Path seqno 



CDATA #REQUIRED> 

CDATA #REQUIRED> 

seqno CDATA #REQU IRED> 

CDATA #REQUIRED> 

seqno CDATA #REQUIRED> 

CDATA #REQUIRED> 



ELEM 
ELEM 
ELEM 
ELEM 
ELEM 
ELEM 
ELEM 
ELEM 
ELEM 
ELEM 
ELEM 
ELEM 
ELEM 



ENT 
ENT 
ENT 
ENT 
ENT 
ENT 
ENT 
ENT 
ENT 
ENT 
ENT 
ENT 
ENT 



Followup-To 

Expires 

Reply-To 

Sender 

References 

Control 

Distribution 

Keywords 

Summary 

Approved 

Lines 

Xref 

Organization 



ATTLIST 
ATTLIST 
ATTLIST 
ATTLIST 
ATTLIST 
ATTLIST 
ATTLIST 
ATTLIST 
ATTLIST 
ATTLIST 
ATTLIST 
ATTLIST 
ATTLIST 



Followup-To 

Expires 

Reply-To 

Sender 

References 

Control 

Distribution 

Keywords 

Summary 

Approved 

Lines 

Xref 

Organization 



#PCDATA> 

#PCDATA> 

#PCDATA> 

#PCDATA> 

#PCDATA> 

#PCDATA> 

#PCDATA> 
#PCDATA> 
#PCDATA> 
#PCDATA> 

#PCDATA> 

#PCDATA> 

#PCDATA> 



seq 

seqno CD 
seq 

seqno CD 
seq 

seqno CD 
seq 
seq 
seq 
seq 

seqno CD 
seqno CD 
seq 



no CDATA #REQU 
ATA #REQUIRED> 
no CDATA #REQU 
ATA #REQUIRED> 
no CDATA #REQU 
ATA #REQUIRED> 
no CDATA #REQU 
no CDATA #REQU 
no CDATA #REQU 
no CDATA #REQU 
ATA #REQUIRED> 
ATA #REQUIRED> 
no CDATA #REQU 



IRED> 

IRED> 

IRED> 

IRED> 
IRED> 
IRED> 
1RED> 



IRED> 



<!ELEM ENT other #PCDATA> 



<!ATTLIST other 
keyword 
seqno 



CDATA #REQU IRED 
CDATA #REQUIRED> 
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create table ngrps_headers_core ( 

internal Msgld integer not null, 
From Info varchar(200) not null, 

MsgDate varchar(50) not null, 

Newsgroups varchar( 1900) not null, 
Subjectfnfo varchar( 1 900) not null, 
Messageld varchar(200) not null, 
Pathlnfo varchar(1900) not null, 

FollowupTo varchar(1900), 
ExpiresOn varchar(50), 
ReplyTo varchar(200), 
Senderlnfo varchar(200), 
References Info varchar( 1 900), 
Control Info varchar( 1 900), 
Distributionlnfo varchar(SOO), 
Keywordslnfo varchar( 1 900), 
Summarylnfo varchar(1900), 
Approvedlnfo varchar(SOO), 
LinesOfEmail integer, 
Xreflnfo varchar(500), 
Organization Info varchar(500), 
FromSeqNum integer, 
MsgDateSeqNum integer, 
NewsgroupsSeqNum integer, 
SubjectSeqNum integer, 
MessageldSeqNum integer, 
PathSeqNum integer, 
FollowupToSeqNum integer, 
ExpiresSeqNum integer, 
ReplyToSeqNum integer, 
SenderSeqNum integer, 
ReferencesSeqNum integer, 
ControlSeqNum integer, 
DistributionSeqNum integer, 
KeywordsSeqNum integer, 
SummarySeqNum integer, 
ApprovedSeqNum integer, 
LinesSeqNum integer, 
XrefSeqNum integer, 
OrganizationSeqNum integer, 
primary key (intemalMsgld), 
unique (Messageld, MsgDate) 

); 

create table ngrps_headers_othr ( 

intemalMsgld integer not null, 
HdrKeyName varchar(50), 
HdrKeyValue varchar(2000), 
HdrKeySeqNum integer, 

foreign key (intemalMsgld) references ngrps_headers_core (intemalMsgld) 

); 



create table container_info ( 

internalld integer not null, 
data_id integer not null, 

posInContainer integer, 
sizeOfMsg integer, 

foreign key (internal Id) references ngrps_headers_core(internalMsgId), 
foreign key (data_id) references MDAS_AD_REPL(data_id) 
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FORMATTED MESSAGE USING XML DTD 



ffi output. xml -XML Notepad 








. '•/. Structure v 1 


Values^. J J= : . ' • \ :' : ' . ' . . ... * 


Q-^rg RFC1036.MESG 
Q O HEADERS 
! a-C3 REQUIRED_HEADERS 
\ a d FROM 

j + SEQNO 
! S"G3 DATE 
j E) C3 NEWSGROUPS 
! Sl-Ql SUBJECT 
i I5J-G3 MESSAGE-ID 




^/\^ ;r >; .--V ; t .. /• ' : . ; '.. . _ /. - ■.. ; 




Casivers@aol.com ! 


2 ! 


22 Mar 1 999 1 6:20:37 -0800 j 


soc. genealogy, hispanic 


Passenger Lists for Ships from Spain To Cuba 


< 2376321 . 36f 6de03@aol. com> I 




news. sdsc. edu! newshub. csu. net! ne wshub. sdsu. edu! newsf eed. berkeley. e. . . 1 


* ♦ SEQNO 
! & Ql 0 PT 1 0 NAL_H E AD E R S 
j EI-G3 LINES 

i a-ca xref 

i ih£3 ORGANIZATION 
'i 1 ♦ SEQNO 
1 El-Ca OTHER.HEADERS 

OTHER J 

1 ♦ KEYWORD 

1 ♦ SEQNO 

dB-GD OTHER j 
£}-•£□ OTHER 
a-C3 OTHER 
OTHER 
OTHER 
G3 £□ OTHER 
-X BODY * 


1 


^S^^^ ... ■ : 


7 


news. sdsc. edu soc. genealogy, hispanic: 31 56 j 


RootsWeb Genealogical Data Cooperative 


6 


? '""^ T •/^•^^ttfS?'"' ! ' ."'<■ "■ ''* . '--^ ' t V.". ":**"'." I I 


localhost 


NNTP-Posting-Host 


9 


1.0 


text/plain; charseMJS-ASCII 


7bit 


bl-1.rootsweb.com 922148437 3147 1 27.0.0.1 (23 Mar 1999 00:20:37 GMT) 


usenet@news. roots web. com 


23 Mar 1999 00:20:37 GMT 


iDoes anyone know where 1 can get passengers lists for ships that transpo... 
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WEB-BASED INTERFACE FOR 
ACCESSING THE E-MAIL COLLECTION. 



'3 MCAT-SRB A Data Integration System - Microsoft Internet Explorer 




J 0e £dft: Yiew Favorites Iools Help 








J Back ■' . Forward; Stop Refresh- Home 


Searcfe Favorites^ History. 


: ;!^il : ". Print B • 




J Address \kW\ http://srb.npaci.edu/ 


2} ^Go J Links" 



Create 
Vu-Sd 


New 
lema 


Add Al 
ToVu-S 


ttribute 
chema 


Drop 
Vu-Schema 


Drop Ailributs 
FrcmVo-Setenia 


Que 
Upd 


ry & 
ate 


vuSc 


hema 



Field 
Demos 



Field Demonstrations for News 
Groups 
Demo #1: Postings of Eric 

Lancaster 
Demo #2: Postings on 7th January 
1999 

Demo #3: Postings on the Classical 
Studies Subject with line 
count above 1000 but 
below 5000 

Demo #4: Postings with Content- 
Type header 

Demo #5: Non-required and non- 
optional headers 

Collection-level Metadata 



Object-level Metadata 

Collection Life- Cycle and Statistics 



Attribute Names 



mcatcore.data name 


1= 4 


L ,,,, 


mcatcore.data collection 
name 






1= d\ 


1 


newsgroups.posbiContainer 


1= 4 


1. 


news groups . size OfMs g 


1= J 


1 


news group s .Me s s age Id 


1= -J 


1 


news group s .Fromlnf 0 


1= d. 


1 


news group s .Ms gD ate 


| like 


|7 Jan 195 


news groups.News groups 




I 


news group s . Subj e ctlnf 0 


h d 


I 


news group s . S end erlnf 0 




I 


news group s .Keywordslnf 0 


L- d 


I .... 


newsgroups.LinesOfEmail 


1= M\ 


I 



Output Format: Tabular C. Linear <~ Tiled <~ Forml 
C Form l & 

Number of Items to Display:|1 0 _ Outer Join: C Yes 
<*No^ 



internet 



FIGURE 12 



PERSISTENT ARCHIVES ^ 
Inventors: Reagan W. Moore, etj 
Howrey Docket No. 02737.0004 .NF 
13/118 




PERSISTENT ARCHIVES 
Inventors: Reagan W. Moore, et al. 
Howrey Docket No. 02737.0004.NPUS01 
14/118 



<! ELEMENT caccf_collection (caccf_database)*> 



list of C ACCF databases 



<! ELEMENT caccf_database ( 

dateid, 

record_size, 

caccf_records 

)> 



a single database 

creation date, identities the database 
size of each record (bytes) 
the actual data records 



<! ELEMENT caccf records ( 

caccf_record* 



list of caccf records 



FIGURE 14 



PERSISTENT ARCHIVES 
Inventors: Reagan W. Moore, et al. 
Howrey Docket No. 02737.0004.NPUS01 N. 
15/118 



v 



<! ELEMENT caccf record EMPTY 


all info is in the attributes 


)> 

<!ATTLIST caccfjecord 


length 


meaning 




ms 


CD ATA #REQUIRED 


1 


Military Service (DoD Component) 




cc 


CDATA #REQUIRED 


2 


Country of Casualty 




tc 


CD ATA #REOUIRED 


2 


Type of Casualty 




m 


CDATA #REOUIRED 


5 


Reference Number (File Ref. No) 




na 


CDATA #REOUIRED 


28 


Name (of Casualty ) 




do 


CDATA #REOUIRED 


4 


Date Record Processed ( YYMM) 




sn 


CDATA #REOUIRED 


9 


Social Security OR Service Num. 




nig 


CDATA #REOUIRED 


4 


Military Grade (Grade or Rate) 




TiQ 
F& 


CDATA ^REQUIRED 


2 


Pay Grade (Grade or Rate) 




dd 


CDATA ^REQUIRED 


8 


Date of Death (MM/DD/YY) (Casualty) 




he 


CDATA #REQUIRED 


20 


"Home of Record" City (Place) 




hs 


CDATA #REQUIRED 


2 


Home of Record State Code 




oc 


CDATA #REQUIRED 


5 


Service Occupation Code 




db 


CDATA #REQUIRED 


8 


Date of Birth (MM/DD/Y Y ) 




rc 


CDATA #REOUIRED 


1 


Reason (Cause of Casualty) 




ai 


CDATA ^REQUIRED 


1 


Aircraft or Not Aircraft 




ra 


CDATA #REOUIRED 


1 


Race 




re 


CDATA #REQUIRED 


2 


Religion Code (Religious Denom.) 




le 


CDATA #REQUIRED 


2 


Length of Service in Years 




ma 


CDATA #REQUIRED 


1 


Marital Status 




se 


CDATA #REQUIRED 


1 


Sex 




ci 


CDATA ^REQUIRED 


1 


Citizen Code 




PP 


CDATA #REQUIRED 


1 


Posthumous Promotion 




dt 


CDATA #REQUIRED 


6 


Date Tour in Southeast Asia 




lr 


CDATA #REQUIRED 


I 


Last Record Code 




br 


CDATA #REQUIRED 


3 


Body Recovered or Not 




ag 


CDATA #REQUIRED 


2 


Age at Time of Casualty 




sc 


CDATA #REQUIRED 


I 


Component (Service Component) 




CO 


CDATA #REQUIRED 


29 


Comments 




ty 


CDATA ^REQUIRED 


2 


Type 




pc 


CDATA #REQUIRED 


2 


Province Code (South Vietnam 


Provinces & 
















Military Regions) 




mc 


CDATA #REQUIRED 


2 


CORPCD 




Pi- 


CDATA #REQUIRED 


2 


PROCD 


)> 


ll 


CDATA #REQUIRED 


2 


Flag 
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create table CACCF ( 

REC_NO int not null, 

MIL_SERVICE char(l), 
CASUALTY_COUNTRY 

CASUALTYJTYPE char (2), 

REF_NO char (5), 

NAME char (28), 

PROCESSED char (4), 

SSN_SERVICE_NO char (9), 

GRADE char (4), 

PAY_GRADE char (2), 

DIED date, 

HOR_CITY char (20), 

HOR_STATE char (2), 

OCCUPATION char (5), 

BORN date, 
CASUALTYREASON char ( 1 ), 

AIR char(l), 

RACE char(l), 

RELIGION char (2), 

S ER V ICE_LENGTH char (2), 

MARITALSTATUS char ( 1 ), 

SEX char(l), 

CITIZEN char (I), 

PH_PROMOTTON char ( 1 ), 

SEA_TOUR date, 

LASTRECORD char ( 1 ), 

BODY_RECOVERED char (3), 



AGE 

COMPONENT 

COMMENTS 

TYPE 

PROVINCE 

Military Regions) 

CORPCD 

PROCD 

FLAG 

NOTES 

PRIMARY KEY 
UNIQUE 



char (2), 
char(l), 
char (29), 
char (2), 
char (2), 

char (2), 

char (2), 

char (2), 

varchar(IOOO) 

(SSN_SERVICE_ 

(REC_NO) 



-- DICE: no. as found in the source 

Military Service (DoD Component) 
char (2), -- Country of Casualty 
-- Type of Casualty 
-- Reference Number (File Ref. No) 

— Name (of Casualty) 

— Date Record Processed (YYMM) 

— Social Security OR Service Num. 

— Military Grade (Grade or Rate) 
Pay Grade (Grade or Rate) 

— Date of Death (M M/D D/Y Y)(Casual ty ) 

— "Home of Record" City (Place) 
-- "Home of Record" State Code 
» Service Occupation Code 

-- Date of Birth (MM/DD/YY) 

— Reason (Cause of Casualty) 

— Aircraft or Not Aircraft 

— Race 

— Religion Code (Religious Denom.) 

— Length of Service in Years 

— Marital Status 
Sex 

— Citizen Code 

— Posthumous Promotion 

-- Date Tour in Southeast Asia 
-- Last Record Code 

— Body Recovered or Not 

— Age at Time of Casualty 

— Component (Service Component) 
-- Comments 

« Type 

~ Province Code (South Vietnam Provinces and 

CORPCD 
~ PROCD 
-- Flag 

DICE: notes/corrections made 
NO), 
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/* Find records with incomplete BORN or DIED date: */ 

SQL> select REC_NO, NAME, BORN, DIED, AGE from CACCF 

where BORN is null or DIED is null; 

REC NO NAME BORN DIED 



21 14 SEVENBERGEN JERRY L 

3882 DOMINGUEZ MICHAEL J 

3883 JORDAN ALLAN H 



10-SEP-66 0 
10-SEP-66 0 



AG 

16-MAR-66 0 



33997 HA LI BURTON MICHAEL R 08-AUG-70 0 

17 rows selected. 

/* How many entries do NOT have a value for 'Date Tour in Southeast Asia'? */ 

SQL> select count (*) from CACCF where SEA_TOUR is null; 

COUNT(*) 
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/* What is the number and min/max/average age of the casualties over entries where AGE is 
available? */ 

SQL> select count(*), min(AGE), max(AGE), avg(AGE) 

• from CACCF 

• where not AGE = '0'; 



COUNT(*) 


MI 


MA 


AVG(AGE) 


58164 


16 


62 


22.7932742 
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1 900 TRANSFORMING REPRESENTATION 

OF DATA OBJECTS INTO SELF- 
DESCRIBING, INFRASTRUCTURE- 
INDEPENDENT REPRESENTATION OF 
DATA OBJECTS 



ARCHIVING SELF-DESCRIBING, 
INFRASTRUCTURE-INDEPENDENT 
REPRESENTATION OF DATA OBJECTS 
WITH SELF-DESCRIBING, 
INFRASTRUCTURE-INDEPENDENT 
REPRESENTATION OF LOGICAL 
STRUCTURE OF COLLECTION 
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2000 



RETRIEVING FROM ARCHIVE SELF- 
DESCRIBING, INFRASTRUCTURE- 
INDEPENDENT REPRESENTATION OF 
LOGICAL STRUCTURE OF COLLECTION 



2002, 



CREATING QUERY-ABLE MECHANISM 
IN ACCORDANCE WITH LOGICAL 
STRUCTURE OF COLLECTION 



2004 



RETRIEVING SELF-DESCRIBING, 
INFRASTRUCTURE-INDEPENDENT 
REPRESENTATION OF DATA OBJECTS 



2006 



LOADING DATA OBJECTS INTO QUERY- 
ABLE MECHANISM 
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2100 
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<! ELEMENT DOCUMENT (CONGRESS (RESOLUTION (ATTESTATION)* > 
<! ATTLIST DOCUMENT DOCID CD ATA #IMPLIED> 
<!ATTLIST DOCUMENT REFERENCENUMBER CDATA #IMPLIED> 
<! ATTLIST DOCUMENT CLASS CDATA #IMPLIED> 

<! ELEMENT CONGRESS (NUMB ER|SESSIONNUMBER|(BODYOFCONGRESS)* (DATE) > 

<! ELEMENT NUMBER (#PCDATA)* > 

<!ELEMENT SESSIONNUMBER (#PCDATA)* > 

<! ELEMENT BODYOFCONGRESS (#PCDATA)* > 

<!ELEMENT DATE (#PCDATA)* > 

<! ELEMENT RESOLUTION (T YPE | STATEMENT) * > 

<! ELEMENT TYPE (#PCDATA)* > 

<! ELEMENT STATEMENT (OPENING|(STRIKEOUT|INSERT)* (CLOSING) > 

<! ELEMENT OPENING (#PCDATA)* > 

<! ELEMENT STRIKEOUT (#PCDATA)* > 

<! ELEMENT INSERT (#PCDATA)* > 

<! ELEMENT CLOSING (#PCDATA)* > 

<! ELEMENT ATTESTATION (#PCDATA)* > 
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<!ELEMENT DOCUMENT (CONGRESS | ACT)* > 

<!ATTLIST DOCUMENT DOCID CDATA #IMPLIED> 

<! ATTLIST DOCUMENT REFERENCENUMBER CDATA #IMPLIED> 

<!ATTLIST DOCUMENT CLASS CDATA #IMPLIED> 

<!ELEMENT CONGRESS (NUMBER|SESSIONNUMBER|BODYOFCONGRESS|DATE) > 

<!ELEMENT NUMBER (#PCDATA)* > 

<! ELEMENT SESSIONNUMBER (#PCDATA)* > 

<! ELEMENT BODYOFCONGRESS (#PCDATA)* > 

<! ELEMENT DATE (#PCDATA)* > 

<! ELEMENT ACT (PURPOSE | SECTION)* > 

<! ELEMENT PURPOSE (#PCDATA)* > 

<! ELEMENT SECTION (HEAD ING|STATEMENT| SUB SECTION)* > 
<! ATTLIST SECTION NUMBER CDATA #IMPLIED> 

<!ELEMENT HEADING (#PCDATA|SHORTTITLE)* > 
<! ELEMENT SHORTTITLE (#PCDATA)* > 

<! ELEMENT STATEMENT (#PCDATA | AMENDMENT | SECTION | ATTESTATION | 
TEXT)* > 

<! ELEMENT AMENDMENT (CODE|STRIKEOUT|INSERT|REDESIGNATE)* > 

<! ELEMENT CODE (#PCDATA)* > 

<! ELEMENT STRIKEOUT (#PCDATA)* > 

<! ELEMENT INSERT (#PCDATA|PARAGRAPH|SUBSECTION|INSERT)* > 
<! ELEMENT PARAGRAPH (TOPIC |TEXT|PARAGRAPH)* > 
<! ATTLIST PARAGRAPH NUMBER CDATA #IMPLIED> 

<! ELEMENT SUBSECTION (TOPIC PARAGRAPH)* > 
<! ATTLIST SUBSECTION NUMBER CDATA #IMPLIED> 

<! ELEMENT TOPIC (#PCDATA)* > 

<! ELEMENT TEXT (#PCDATA)* > 

<! ELEMENT REDESIGNATE (#PCDATA)* > 

<! ELEMENT ATTESTATION (#PCDATA)* > 
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< DOCUMENT DOCID="f :hcl48eas .txt" REFERENCENUMBER - "H. CON. RES. 14 8" 
CLASS=" Concurrent Resolution" > 

< CONGRESS > 

<NUMBER> 104 th CONGRESS < /NUMBER > 

< SESS IONNUMBER> 2d Ses s ion< / SESS IONNUMBER> 

< BODYOFCONGRESS >Senate of the United States < /BODYOFCONGRESS* 
< DATE >Mar ch 21, 1996</DATE> 
/CONGRESS> 

< RESOLUTION > 

< TY PE > AMENDMENTS < / TYPE > 

< STATEMENT > 
<OPENING> 

Resolved, That the resolution from the House of Representatives (H . Con. 
Res. 14 8) entitled "Concurrent resolution expressing the sense of the 
Congress that the United States is committed to military stability in the 
Taiwan Strait and the United States should assist in defending the 
Republic of China (also known as Taiwan) in the event of invasion, 
missile attack, or blockade by the People's Republic of China.", do pass 
with the following 
</OPENING> 

<STRIKEOUT>Strike out all after the resolving clause 
</STRIKEOUT> 
<INSERT> 

That it is the sense of the Congress— 

(1) to deplore the missile tests and military exercises that the 
People's Republic of China is conducting from March 8 through March 25, 
1996, and view such tests and exercises as potentially serious threats to 
the peace, security, and stability of Taiwan and not in the spirit of the 
three United States -China Joint Communiques ; 

(2) to urge the Government of the People's Republic of China to cease 
its bellicose actions directed at Taiwan and enter instead into 
meaningful dialogue with the Government of Taiwan at the highest levels, 
such as through the Straits Exchange Foundation in Taiwan and the 
Association for Relations Across the Taiwan Strait in Beijing, with an 
eye towards decreasing tensions and resolving the issue of the future of 
Taiwan; 

(3) that the President should, consistent with section 3® of the 
Taiwan Relations Act of 1979 (22 U.S.C. 3302®), immediately consult with 
Congress on an appropriate United States response to the tests and 
exercises should the tests or exercises pose an actual threat to the 
peace, security, and stability of Taiwan; (4) that the President should, 
consistent with the Taiwan Relations Act of 1979 (22 U.S.C. 3301 et 
seq.), reexamine the nature and quantity of defense articles and services 
that may be necessary to enable Taiwan to maintain a sufficient self- 
defense capability in light of the heightened military threat; and 
(5) that the Government of Taiwan should remain committed to the 
peaceful resolution of its future relations with the People's Republic of 
China by mutual decision. 

</INSERT> 

<STRIKEOUX>Strike out the preamble*: /STR I KEOUT> 
<INSERT> 

Whereas the People's Republic of China, in a clear attempt to intimidate 
the people and Government of Taiwan, has over the past 9 months conducted 
a series of military exercises, including missile tests, within 
alarmingly close proximity to Taiwan; 

Whereas from March 8 through March 15, 1996, the People's Republic of 
China conducted a series of missile tests within 25 to 35 miles of the 2 
principal northern and southern ports of Taiwan, Kaohsiung and Keelung; 
Whereas on March 12, 1996, the People's Republic of China began an 8-day, 
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live -ammunition, joint sea-and-air military exercise in a 2,390 square 
mile area in the southern Taiwan Strait ; 

Whereas on March 18, 1996, the People's Republic of China began a 7-day, 
live -ammunition, joint sea-and-air military exercise between Taiwan's 
islands of Matsu and Wuchu; 

Whereas these tests and exercises are a clear escalation of the attempts 
by the People's Republic of China to intimidate Taiwan and influence the 
outcome of the upcoming democratic presidential election in Taiwan; 
Whereas through the administrations of Presidents Nixon, Ford, Carter, 
Reagan, and Bush, the United States has adhered to a "One China" policy 
and, during the administration of President Clinton, the United States 
continues to adhere to the "One China" policy based on the Shanghai 
Communique of February 27, 1972, the Joint Communique on the 
Establishment of Diplomatic Relations Between the United States of 
America and the People's Republic of China of January 1, 1979, and the 
United States-China Joint Communique of August 17, 1982 ; 

Whereas through the administrations of Presidents Carter, Reagan, and 
Bush, the United States has adhered to the provisions of the Taiwan 
Relations Act of 1979 (22 U.S.C. 3301 et seq.) as the basis for 
continuing commercial, cultural, and other relations between the people 
of the United States and the people of Taiwan and, during the 
administration of President Clinton, the United States continues to 
adhere to the provisions of the Taiwan Relations Act of 1979; 

Whereas relations between the United States and the Peoples' Republic of 
China rest upon the expectation that the future of Taiwan will be settled 
solely by peaceful means; Whereas the strong interest of the United 
States in the peaceful settlement of the Taiwan question is one of the 
central premises of the three United States -China Joint Communiques and 
was codified in the Taiwan Relations Act of 1979; Whereas the Taiwan 
Relations Act of 1979 states that peace and stability in the western 
Pacific "are in the political, security, and economic interests of the 
United States, and are matters of international concern"; / 

Whereas the Taiwan Relations Act of 1979 states that the United States 
considers "any effort to determine the future of Taiwan by other than 
peaceful means, including by boycotts, or embargoes, a threat to the 
peace and security of the western Pacific area and of grave concern to 
the United States"; W 

hereas the Taiwan Relations Act of 197 9 directs the President to "inform 
Congress promptly of any threat to the security or the social or economic 
system of the people on Taiwan and any danger to the interests of the 
United States arising therefrom"; 

Whereas the Taiwan Relations Act of 1979 further directs that "the 
President and the Congress shall determine, in accordance with 
constitutional process, appropriate action by the United States in 
response to any such danger" ,- 

Whereas the United States, the People's Republic of China, and the 
Government of Taiwan have each previously expressed their commitment to 
the resolution of the Taiwan question through peaceful means; and 

Whereas these missile tests and military exercises, and the accompanying 
statements made by the Government of the People's Republic of China, call 
into serious question the commitment of China to the peaceful resolution 
of the Taiwan question: Now, therefore, be it. 



</INSERT> 
<CLOSING> 



Amend the title so as to read: "Expressing the sense of 
Congress regarding missile tests and military exercises by the 
People's Republic of China.". 
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</CLOSING> 
</ STATEMENT > 
</RESOLUTION> 

< ATTESTAT ION > Se ere t ar y< / ATTESTATION > 
</DOCUMENT> 
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<!_ ==== 

Auth 
$ld: ; 



1. COMBINES amico-objects.dtd and amico-media.dtd into a 
SINGLE DTD. 

2. Defines an ADDITIONAL LEVEL on top of the AMICO elements, 
corresponding to the abstract classification of AMICO elements 

in the data dictionary: 

am_classification?, What is it? 
am_titles, What is it called? 

am_appearance?, What does it look like? 

am_creators t Who made it? 
am_creation_date, When was it made? 
am_creation_place?, Where was it made? 
am_context?, What is it about? 

am_criticism?, What does it mean? 

am_exhibitions?, Who showed it? 
am_owners. Who owned it? 

am_related_works, What is it related to? 
am_documenters?, Who documented it? 
am_version_control? ( Version Control? 
am media metadata Media Metadata 



32C 



NAMING CONVENTIONS FOR ELEMENTS: 

The AMICO data dictionary has a 32A 
o TAG column defining a 3-letter abbreviation, and a 

o AMICO-FIELD column with the full name of the field. 326 
These two components make up the XML tag name as follows 

o X Y2_AM I CO_F I E LD_N A M E 
For elements which are *groups*. i.e., contain subelements, 
just one underscore is used. 32D 
Example. „ 
o CLG_classification (group) 
o OTY object type (atomic element) 

32G 
32H 
32I 
32J 

NOTE: OLC is not modeled, as it is listed under * deleted fields* 

32K 



NOTE: OLC is not modeled, as it is listed under 'deleted fields' 

MULTIPLICITY OF ELEMENTS IN DTDs: 
* = zero-or-more 
+ = one-or-more 
? = zero-or-one 
otherwise: exactly one 
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<! ELEMENT am_objects (am_object*) > 
<! ELEMENT am_object ( 



AID amico_identifier ? 

am_classifi cation?, 

am_titles, 

am_appearance?, 

am_creators, 

am__creation_dates, 

am_creation_place? l 

am_context?, 

am_criticism?, 

am_exhibitions?, 

am_owners, 

am_related_works?, 

am_documenters?, 

am_version_control? f 

a m_med i a_m etad ata 

)> 



<! ELEMENT am_classification ( 
OTY_object_type*. 
OPP object_parts_pieces*, 



CLG_classification* 
)> 



<! ELEMENT CLG_classification ( 



CLS classification_scheme?, 

CLT ciassification_term? 

)> 



<! ELEMENT amjjtles ( 



OTG_object_title_name+, 

OST state*, 

OEN_edition* 

)> 



<!ELEMENT OTG_object_title_name ( 



OTN object_title_name?, 

OTT titlejype* 

)> 
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<! ELEMENT am_appearance ( 

OPD physical_description? ( 

OPA physicaLorientation^arrangement?, 

MET measurements_text*, 

MEG_measurements*, 
6MG_materials_and_techniques*, 

OIN inscriptions_and_or_marks*, 

OCH condition_examination_history* ? 

OTH treatment_conservation_history* 

)> 

<! ELEMENT MEG_measurements ( 

MCM measurement_componentjrieasured?, 

MED measurement_dimension?, 

MDV measurement_dimension_value?, 

MDU measurement_dimension_units?, 

MEQ measurement_qualifier? 

)> 

<!ELEMENT OMG_materials_and_techniques ( 

OMD materials_and_techniques_description?, 

OMT materials_and_technique$_process_technique_term*, 

OMM materials_and_techniques_materials_term*, 

OMS materials_and_techniques_support* 

)> 

<! ELEMENT am_creators ( 
CRG_creator* 
)> 

<! ELEMENT CRG_creator ( 

CRQ creator_qualifier? t 

CRT creator_name_text, 

CRN creator_name?, 

CRC creator_culture_nationality?, 

CDT creator_datesJocations_text? t 

CBD creator_birth_date?, 

CBP creator_birth_place? f 

CBQ creator_birth_qualifier? f 

CDD creator_death_date?, 

CDP creator_death_place? f 

CDQ creator_death_qualifier?, 

CAD creator_active_date? , 

CAP creator_active_place*, 

CGN creator_gender?, 

CRB creator_biography?, 

CRR creator_role*, 

CNO creator_notes? 

)> 
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<! ELEMENT am_creation_dates ( 

OCG_creation_dates* 
)> 

<! ELEMENT OCG_creation_dates ( 



OCT creation_date_text, 

OCS creation_date_start?, 

OCE creation_date_end?, 

OCQ creation_date_qualifier? 

)> 



<! ELEMENT am_creation_place ( 

OCP creation_place* 

)> 

<! ELEMENT am_context ( 



<!ELEMENT STG_style_period ( 



STD style_period_description? f 

STT style period terms* 
)> 



<! ELEMENT SUG_subject_matter ( 



SUP subject^matter^preiconographic^description?, 

SUI subject_matterjconography*, 

SUT subject_matter_index_terms* 

)> 



CXD context_description?, 

CXP context_related_person*. 

CXS context_related__site_place*, 

CXT context_time_period_dates? 

)> 



<! ELEMENT am_criticism ( 

OCR criticaljresponses* 

)> 

<! ELEMENT am_exhibitions ( 

OEH exhibition_orJoan_history* 

)> 

<! ELEMENT am_owners ( 



OOG_owner* t 

OPO provenance_prior_owners_text*, 

O RG_ri g hts_copy ri g h t* 
)> 
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STG_style_period*, 
S U G_s u bject_m atte r* , 
CXG_cx>ntext* 
)> 



<! ELEMENT CXG_context ( 
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<! ELEMENT OOG_owner ( 

OON owner_name, 

OOP owner_place\ 

OOA owner_accession_number*, 

OOC owner_credit_line+ 

)> 




<! ELEMENT ORG_rights_copyright ( 

ORS copyright_statement, 

ORL copyrightjink? 

)> 



<! ELEMENT am_related_works ( 

RWGjrelated_works_of_art*, 
RIG_related_irnages*, 
RMG_related_multimedia*. 
RDG_related_documents* 
)> 

<!ELEMENT RWG_related_works_of_art ( 

RWD related_works_description? ( 

RWR related_works_relationship_type?, 

RWL related_works_identifier_link? 

)> 

<! ELEMENT RIG_related_images ( 

Rl P re latedj mag e_p referred ? , 

RID related_image_description? f 

RIR related Jmage_relationship_type? f 

RIL related_image_identifierjink? 

)> 

<! ELEMENT RMG_related_multimedia ( 

RMD related_multimedia_description? l 

RMR related_multimedia_relationship_type?, 

RML related_multimedia_identifierjink? 

)> 

<!ELEMENT RDG_related_documents ( 

RDD related_document__description?, 

RDR related_document_relationship_type?, 

RD L related_docu ment_identifier J ink? 

)> 

<! ELEMENT am_documenters ( 

DCG_documentation_cataloguing_history* 
)> 



<! ELEMENT DCG_documentation_cataloguing_history ( 

DCB documented_cataloged_by?, 

DCD documented_cataloged_date? 

)> 
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<! ELEMENT am_version_controi \ 

AVD amico_validated_date? 1 

AW validation_dictionary_version?, 

ADP amico_data_processing*, 

DEL amico_deletion_flag?, 

ALY amico_library_year? 

)> 

<!- ======== MEDIA METADATA ====================== 

> 

<! ELEMENT arn_media_metadata ( 

XID dc_resourceJdentifier? f 

XTI_dc_title*, 
XCN_dc_creator* f 

XDE dc_description*, 

XPU dc_publisher? t 

XDN_dc_contributor*, 
XDA_dc_date*, 

XRT dc_resourcetype?, 

XAM amico_mode?, 

XFO_dc_format*, 
XRE_dc_relation\ 

XRS dc_rights+, 

XMN amico_media_note?, 

XVD amico_metadata_validation_date?, 

XW amico data dictionary version?, 

XPR metadata_data_processing_note*, 

XDL metadata_delition_flag?, 

XL Y m etada taj i b ra ry_y ea r? 

)> 

<! ELEMENT XCN_dc_creator ( 

XCP dc_creator_personalname*, 

XCC d^creato^corporatename*, 

XCR dc_creator_role* 

)> 

<! ELEMENT XDN_dc_contributor ( 

XDP dc_contributor_personalname\ 

XDC dc_contributor_corporatename* f 

XDR dc_contributor_role* 

)> 

<! ELEMENT XFO_dc_format ( 

XFE amico_format_encoding? f 

XFP amico_format_colorpalette?, 

XCM amico_format_colormetric?, 

XFD amico_format_dimensions? ? 

XFF amico_format_filesize?, 

XFC amico_format_compression? 

)> 
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5300 



RETRIEVING FROM ARCHIVE SELF- 
DESCRIBING, INFRASTRUCTURE- 
INDEPENDENT, OR EXECUTABLE 
SPECIFICATION OF OF ONE OR MORE 
TRANSFORMATIONS 



5302 



RETRIEVING FROM ARCHIVE ONE OR 
MORE DATA OBJECTS FROM THE 
COLLECTION 



5304 



EXECUTING THE SPECIFICATION TO 
AUTOMATICALLY PLACE THE ONE OR 

MORE DATA OBJECTS INTO A FORM 
SUITABLE FOR MIGRATION TO A NEW 
MEDIUM 
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<! ELEMENT XRE_dc_relation ( 

XR Y dc_relation_type? , 

XRI dc_relation_identifier? 

)> 



<!_ ========= ATOMIC ELEMENTS 

> 



<! ELEMENT ADP amico_data_processing 

(#PCDATA)> 

<! ELEMENT AID amico_identifier 

(#PCDATA)> 

<! ELEMENT ALY amico_library_year 

(#PCDATA)> 
<!ELEMENT AVD amico_validated_date 

(#PCDATA)> 
<! ELEMENT AW validation dictionary version 

(#PCDATA)> 

<! ELEMENT CAD creator_active_date 

(#PCDATA)> 

<! ELEMENT CAP creator_active_place 

(#PCDATA)> 

<!ELEMENT CBD creator_birth_date 

(#PCDATA)> 

<! ELEMENT CBP creator_birth_place 

(#PCDATA)> 

<! ELEMENT CBQ creator_birth_qualifier 

(#PCDATA)> 

<! ELEMENT CDD creator_death_date 

(#PCDATA)> 
<! ELEMENT CDP creator_death_place 

(#PCDATA)> 
<! ELEMENT CDQ creator_death_qualifier 

(#PCDATA)> 
<! ELEMENT CDT creator_dates_locations_text 

(#PCDATA)> 

<!ELEMENT CGN creator_gender 

(#PCDATA)> 

<! ELEMENT CLS classification_scheme 

(#PCDATA)> 

<! ELEMENT CLT classification_term 

(#PCDATA)> 

<! ELEMENT CNO creator_notes 

(#PCDATA)> 

<!ELEMENT CRB creator_biography 

(#PCDATA)> 

<! ELEMENT CRC creator_culture_nationality 

(#PCDATA)> 

<!ELEMENT CRN creator_name 

(#PCDATA)> 

<! ELEMENT CRQ creator_qualifier 

(#PCDATA)> 

<! ELEMENT CRR creator_role 
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<!ELEMENT CRT creator_name_text 

(#PCDATA)> 

<!ELEMENT CXD context_description 

(#PCDATA)> 
<!ELEMENT CXP context_related_person 

(#PCDATA)> 
<!ELEMENT CXS context_related_site_place 

(#PCDATA)> 
<!ELEMENT CXT context time period dates 

(#PCDATA)> 
<!ELEMENT DCB documented_cataloged_by 

<#PCDATA)> 
<!ELEMENT DCD documented_cataloged_date 

(#PCDATA)> 

<!ELEMENT DEL arntco_deletion_flag 

(#PCDATA)> 

<!ELEMENT 

MCM measurement_component_measured 

(#PCDATA)> 

<!ELEMENT MDU measurement_dimension_units 

(#PCDATA)> 

<!ELEMENT MDV measurement_dimension_value 

(#PCDATA)> 

<!ELEMENT MED measurement_dimension 

(#PCDATA)> 

<!ELEMENT MEQ measurement_qualifier 

(#PCDATA)> 

<!ELEMENT MET measurements_text 

(#PCDATA)> 

<!ELEMENTOCE creation_date_end 

(#PCDATA)> 

<!ELEMENT OCH condition_examination_history 

(#PCDATA)> 

<!ELEMENT OCP creation_place 

(#PCDATA)> 

<!ELEMENT OCQ creation_date_qualifier 

(#PCDATA)> 

•<!ELEMENT OCR critical_responses 

(#PCDATA)> 

<!ELEMENT OCS creation_date_start 

(#PCDATA)> 

<!ELEMENT OCT creation_date_text 

(#PCDATA)> 

<!ELEMENT OEH exhibition_or_loan_history 

(#PCDATA)> 

<!ELEMENT OEN_edition 

(#PCDATA)> 

<!ELEMENT OIN inscriptions_and_or_marks 

(#PCDATA)> 
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ELEMENT OMD materials_and_techniques_description 

(#PCDATA)> 

ELEMENT OMM materials_and_techniques_materials_term 

(#PCDATA)> 

ELEMENT OMS materials_and_techniques_support 

(#PCDATA)> 

ELEMENT OMT materials_and_techniques_process_technique_term 

(#PCDATA)> 
ELEMENT OOA owner_accession_number 

(#PCDATA)> 

ELEMENT OOC^owner_credit_line 

(#PCDATA)> 

ELEMENT OON owner_name 

(#PCDATA)> 

ELEMENT OOP owner_place 

(#PCDATA)> 

ELEMENT OPA physical orientation arrangement 

(#PCDATA)> 
ELEMENT OPD physical_description 

(#PCDATA)> 
ELEMENT OPO provenance_prior_owners_text 

(#PCDATA)> 

ELEMENT OPP object_parts_pieces 

(#PCDATA)> 

ELEMENT ORL_copyright_!ink 

(#PCDATA)> 

ELEMENT ORS copyright_statement 

(#PCDATA)> 

ELEMENT OST state 

(#PCDATA)> 

ELEMENT OTH treatment_conservation_history 

(#PCDATA)> 

ELEMENT OTN object_title_name 

(#PCDATA)> 

ELEMENT OTT_title_type 

(#PCDATA)> 

ELEMENT OTY object_type 

(#PCDATA)> 

ELEMENT RDD related_document_description 

(#PCDATA)> 

ELEMENT RDL related_document_identifier_link 

(#PCDATA)> 

ELEMENT RDR related_document_relationship_type 

(#PCDATA)> 
ELEMENT RID related_image_description 

(#PCDATA)> 
ELEMENT RIL related_image_identifier_link 

(#PCDATA)> 
ELEMENT RIP related_image_preferred 

(#PCDATA)> 
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ELEMENT RIR related_image_relationship_type 

(#PCDATA)> 

ELEMENT RMD related_multimedia_description 

(#PCDATA)> 

ELEMENT RML related_multimedia_identifier_link 

(#PCDATA)> 

ELEMENT RMR related_multimedia_relationship_type 

(#PCDATA)> 

ELEMENT RWD related_works_description 

(#PCDATA)> 

EL E M E NT RW L related_works_ide ntif ierj i n k 

(#PCDATA)> 

ELEMENT RWR related_works_relationship_type 

(#PCDATA)> 

ELEMENT STD style_period_description 

(#PCDATA)> 

ELEMENT STT style_period_terms 

(#PCDATA)> 

ELEMENT SUI subject_matter_iconography 

(#PCDATA)> 

ELEMENT SUP subject_matter_preiconographic_description 

(#PCDATA)> 

ELEMENT SUT subject_matter_index_terms 

(#PCDATA)> 

_ ======== MEDIA METADATA ===================== 



ELEMENT XAM amico_mode 

(#PCDATA)> 
ELEMENT XCC dc_creator_corporatename 

(#PCDATA)> 
ELEMENT XCM amico_format_colormetric 

(#PCDATA)> 
ELEMENT XCP dc_creator_personalname 

(#PCDATA)> 

ELEMENT XCR_dc_creator_role 

(#PCDATA)> 

ELEMENT XDA_dc_date 

(#PCDATA)> 

ELEMENT XDC dc_contributor_corporatena me 

(#PCDATA)> 

ELEMENT XDE_dc_description 

(#PCDATA)> 
ELEMENT XDL metadata_delition_flag 

(#PCDATA)> 
ELEMENT XDP dc_contributor_personalname 

(#PCDATA)> 

ELEMENT XDR dc_contributor_role 

(#PCDATA)> 
ELEMENT XFC amico_format_compression 

(#PCDATA)> 
ELEMENT XFD amico_format_dimensions 

(#PCDATA)> 
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<!ELEMENT XFE amico_format_encoding 

(#PCDATA)> 
<!ELEMENT XFF amico_format_fllesize 

(#PCDATA)> 
<!ELEMENT XFP amicx3_format_colorpalette 

(#PCDATA)> 
<!ELEMENT XID dc_resource_identifier 

(#PCDATA)> 
<!ELEMENT XLY metadata_library_year 

(#PCDATA)> 

<!ELEMENT XMN amico_media_note 

(#PCDATA)> 

<!ELEMENT XPR metadata_data_processing_note 

(#PCDATA)> 

<!ELEMENT XPU_dc_publisher 

(#PCDATA)> 

<!ELEMENT XRI_dc_relation_identifier 

(#PCDATA)> 

<!ELEMENT XRS_dc_rights 

(#PCDATA)> 

<!ELEMENT XRT dc_resourcetype 

(#PCDATA)> 

<!ELEMENT XRY_dc_relation_type 

(#PCDATA)> 

<!ELEMENT XTI dc_title 

(#PCDATA)> 

<!ELEMENT XVD amico_metadata_validation_date 

(#PCDATA)> 

<!ELEMENT XW amico data dictionary_version 

(#PCDATA)> 
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) 



#!/usr/local/bin/peri -w 
use strict; 

#============================================= 

# amico2xml AMICO records to XML conversion 

# Author: Bertram Ludaescher <ludaesch@sdsc.edu> 
# 

# call: amico2xml file 

# returns: XML-ified version 
# 

# 2do: extend escXML, handling of binary data 

# $ld: amico2xml,v 1.1 1999/04/06 08:43:40 ludaesch Exp $ 



my Soutput; # handle for current output stream 

my @record = (); # current record (= array of fields) 

my Stab = 2; # current output tab position 

sub ind { # indent according to current tab position 
return " " x $tab 

} 



my %long_tag = # hash of defined tags (long names currently 

# not used) 

( 

"ADP" => "ADP amico_data_processing" t 

"AID" => "AID amicojdentifier", 

"ALY" => ,, ALY_amicoJibrary_year" ? 
"AVD" => "AVD_amico_validated_date ,, ) 
"AW" => "AW_validation_dictionary_version", 

"CAD" => "CAD creator_active_date , \ 

"CAP" => "CAP creator_active_place", 

"CBD" => "CBD_creator_birth_date", 

"CBP" => M CBP_creator_birth_place", 

"CBQ" => "CBQ creator_birth_qualifier ,, I I 

"CDD" => "CDD_creator_death_date ,f , 35A 
"CDP" => "CDP_creator_death_place", 

"CDQ" => M CDQ_creator_death_qualifier", 35B 
"CDT" => "CDT creator_datesjocations_text", 

"CGN" => "CGN_creator_gender , \ 35C 
"CLG" => "CLG_classification" ( ~ 
"CLS" => "CLS^Iassification.scheme", 350 
"CLT" => H CLT_classification_term ", 

"CNO" => "CNO_creator_notes" t 35E 
"CRB" => "CRB_creator_biography", 

"CRC" => "CRC_creator_culture_nationality" t 35F 

35G 
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"CRG" => "CRG_creator", 
"CRN" => "CRN_creator_name", 
"CRQ" => "CRQ_creator_qualifier", 

"CRR" => "CRR creator_role", 

"CRT" => "CRT_creator_name_text", 
"CXD" => "CXD_context_description", 
"CXG" => "CXG_context", 
"CXP" => H CXP_context_related_person", 

"CXS" => "CXS context_related_site_place", 

"CXT" => "CXT context Jime_period_dates ", 

"DCB" => "DCB documented_cataloged_by", 

"DCD" => "DCD documented_cataloged_date ", 

"DCG" => "DCG_documentation_cataloguing_history" f 

"DEL" => "DEL amico_de!etion_flag , \ 

"MCM" => "MCM measurement_component_measured", 

"MDU" => "MDU measurement_dimension_units", 

"MDV" => "MDV measurement_dimension_value", 

"MED" => "MED measurement_dimension", 

"MEG" => "MEGjTieasurements", 

"MEQ" => "MEQ measurement_qualifier ", 

"MET" => "MET measurementsjext", 

"OCE" => "OCE_creation_date_end" ) 
"OCG" => "OCG_creation_dates", 

"OCH" => "OCH condition_examination_history", 

"OCP" => "OCP_creation _place", 

"OCQ" => "OCQ creation_date_qualifier", 

"OCR" => "OCR critical_responses", 

"OCS" => "OCS_creation_date_start", 
"OCT" => "OCT_creation_datejext", 
"OEH" => ,, OEH_exhibition_or_loan_history" l 
"OEN" => "OEN_edition", 

"OIN" => "OIN inscriptions_and_or_marks", 

"OMD" => "OMD materials_and_techniques_description", 

"OMG" => "OMG_materials_and_techniques", 

"OMM" => "OMM materials_and_techniques_materials_term", 

"OMS" => "OMS materials_and_techniques_support , \ 

"OMT" => "OMT materials_and_techniques_process_technique_term" t 

"OOA" => "OOA owner_accession_number", 

"OOC" => "OOC ownei^creditjine", 

"OOG" => "OOG_owner", 
"OON" => "OON_owner_name". 

"OOP" => "OOP owner_place", 

"OPA" => "OPA physicaLorientation_arrangement" f 

"OPD" => "OPD physicaLdescription", 

"OPO" => "OPO provenance_prior_owners_text", 

"OPP" => "OPP_object_parts_pieces", 
"ORG" => "ORG_rights_copyright", 
"ORL" => "ORL_copyrightJink'\ 

"ORS" => "ORS copyright^statement", 

"OST" => "OST_state", 

"OTG" => "OTG^bjectJitle^ame", 

"OTH" => "OTH ^treatment_conservation_history", 
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"OTN" => "OTN_object_title_name", 
"OTT" => "OTT_title_type 
"OTY" => M OTY_object_type", 

"RDD" => "RDD related_document_description", 

"RDG" => "RDG_related_documents", 

"RDL" => "RDL related_document_identifierJink" t 

M RDR" => "RDR related_documentjrelationship_type", 

"RID" => "RID related_image_description", 

"RIG" => "RIG.relatedJmages", 

"RIL" => "RIL_related_image_identifier_link" f 

"RIP" => "Rip_related_image_pref erred", 

"RIR" => "RIR relatedJmage_relationship_type", 

"RMD" => "RMD related_multimedia_description", 

"RMG" => "RMG_related_multimedia M , 

"RML" => "RML_related_multimedia_identifierJink", 

"RMR" => "RMR related_multimedia_relationship_type" t 

"RWD" => "RWD related_works_description", 

"RWG" => "RWG_reiated_works_of_art , \ 

"RWL" => "RWL related_works_identifierJink", 

"RWR" => "RWR related_works_relationship_type 

"STD" => "STD style_period_description , \ 

"STG" => "STG_style_period", 

"STT" => "STT style_period_terms ", 

"SUG" => "SUG_subject_matter ,, ) 

"SUI" => "SUI subject_matteMconography H , 

"SUP" => "SUP subject_matter_preiconographic_description", 

"SUT" => "SUT subject_matteMndex_terrns 

"XAM" => "XAM amico_mode'\ 

"XCC" => "XCC dc^reator^corporatename", 

"XCM" => "XCM amico_format_colormetric", 

"XCN" => "XCN_dc_creator ,, ( 

"XCP" => "XCP dc_creator_personalname" t 

"XCR" => "XCR_dc_creator_role ", 
"XDA" => "XDA_dc_date", 

"XDC" => "XDC dc_contributor_corporatename", 

"XDE" => "XDE dc_description", 

"XDL" => "XDL metadata_delition_flag", 

"XDN" => "XDN.dc.contributor", 

"XDP" => "XDP dc_contributor_personalname", 

"XDR"'=> "XDR_dc_contributor_role ", 

"XFC" => "XFC amico_format_compression", 

"XFD" => "XFD amico_format_dimensions", 

"XFE" => "XFE amico_format_encoding", 

"XFF" => "XFF_amicoJormat_filesize", 
»XFO" => "XFO_dc_format M f 

»XFP" => "XFP amico_format_colorpalette M t 

"XID" => "XID_dc_resource_identifier\ 
"XLV => ,, XLY_metadataJibrary_year" f 
"XMN" => "XMN_amico_media_note H , 

"XPR" => "XPR metadata_data_processing_note", 

"XPU" => "XPU_dc_publisher", 
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"XRI" => "XRI dc_relation_identifier", 

"XRS" => "XRS_dc_rights", 
"XRT" => "XRT_dc_resourcetype", 
"XRY" => "XRY_dc_relation_type", 
••XTI" => "XTI_dc_title", 

"XVD" => "XVD amico_metadata_validation_date", 

"XW" => "XW_amico_data_dictionary_version" 

); 

sub name { # long or short tagnames 

return $_[0] # short 

# return $long_tag{$_[0]} # long 
} 

my %is_group = (); # what tags are groups? 

while ((my Stag, my Slong) = each %long_tag) { 
$is_group{$tag} = 1 if (Slong !~ / /); 

} 



my %group_members = # members of each group 
( 

"CLG" => "CLS CLT", 

"CRG" => "CRQ CRT CRN CRC CDT CBD CBP CBQ CDD CDP CDQ CAD CAP CGN CRB CRR 
CNO". 

"CXG" => "CXD CXP CXS CXT", 

"DCG" => "DCB DCD", 

"MEG" => "MCM MED MDV MDU MEQ", 

"OCG" => "OCT OCS OCE OCQ", 

"OMG" => "OMD OMT OMM OMS", 

"OOG" => "OON OOP OOA OOC". 

"ORG" => "ORS ORL", 

"OTG" => "OTN OTT", 

"RDG" => "RDD RDR RDL", 

"RIG" => "RIP RID RIR RIL", 

"RMG" => "RMD RMR RML", 

"RWG" => "RW.D RWR RWL", 

"STG" => "STD STT", 

"SUG" => "SUP SUI SUT". 

"XCN" => "XCP XCC XCR". 

"XDN" => "XDP XDC XDR", 

"XFO" => "XFE XFP XCM XFD XFF XFC", 

"XRE" => "XRY XRI" 

); 
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my %group = (); # inverse: returns the group of a member 

while ((my $group, my $memstr) = each %group_members) { 
my ©members = ($memstr =- m/[A-Z]{3}/g); 
foreach (©members) { 
$group{$ J = $group; 

} 

} 



sub escXML { # escape characters: EXTEND/FIX! 

$_=$_[0]; 
s/</</g; 
s/&/&/g; 
s/a/a/g; 
s/a/a/g; 
s/a/a/g; 
s/e/e/g; 
s/e/e/g; 
s/n/n/g; 

$_; 



sub output_fields { # output alt fields from current record 
©record = @_; 

my Sfield = shift ©record; # get the next field 
if (defined Sfield) { 

&output_field($field); # output it (and more if group!) 

&output_fields(@record); # recurse through the remains 

# of ©record 

} 

} 

sub output_field { # output the given field PLUS follow-up group 
# members! 
(my Sfield) = @_; 

if (Sfield =- /([A-Z]{3})(.*)/) { # is it a 3 letter tag + data? 
my Stag = $1 ; my Sdata = $2; 

if ($long_tag{$tag}) { # do we know this tag? 

if ($is_group{$tag» { # is it a group? 

print* &ind, "< M , &name($tag) t ">\n"; 
Stab +=2 ; 

&output_group($tag); # and output closing tag! 

} 

else { # it's a simple recognized tag 

print &ind, "<", &name($tag), ">", 
&escXML($data), 
"</", &name($tag), ">\n H ; 

} 

} else { # don't know this tag! 

print "<am_ERROR type = V'unrecognized tag\">"; 
print &ind, "<$tag>", &escXML($data), H </$tag>\n"; 
print " </ a m_E R RO R>\n" ; 

} 
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} else { # don't understand that field! 

print "<am_ERROR type = Y'un recognized fie!d\">" f $_; 
print "</am_ERROR>\n"; 

} 

} 

sub output_group { # output fields WHILE in same group 
(my $grp) = @_; 

my $field = shift @record; # get the next field 
if (defined $field) { 
if ($field =- /([A-Z]{3})/) {# should be a tag 
if (defined $group{$1} and $group{$1} eq $grp) { 

# still in the same group?? 

&output_field($field); 

&output_group($grp); # STAY in the same group 

} else { # LEAVE group! 

Stab -= 2; 

print &ind, "</", &name($grp), ">\n"; # closing tag for group 
&output_field($field); 

} 

} else { # don't understand that field! 

print "<am_ERROR type = \"un recognized field\">" f $_; 
print "</am__ERROR>\n"; 

} 

} # empty ©record => done 



sub get_record { # read the next record and return first tag 
chomp; 

©record = split A}-/; # End-Of-Record = "|\n" 

my $last = pop(@record); # get EOR 

if ($last ne "I") { # -and check 

print STDOUT ERROR: unknown delimiter <$last>\n"; 

} 

if (©record) { # check if non-empty 

return substr($record[0] f 0,3) # return the tag name 
} else { 

print STDOUT WARNING: empty record\n"; 
return 0 

} 

} 
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#==== = = = = === = ===========================:== = =: ====== ========== 

# MAIN 

#====================================:======================= 

$1 = 1; 

open(AMjDBJS, ">amico_objects.xml") or die "*** ERROR <@_>\n<$?>\n"; 
open(AM_MEDIA, ">amico_media.xmi") or die ERROR <@_>\n<$?>\n M ; 

print AMJDBJS «EOF; 

<am_objects> 

EOF 

print AM_MEDIA «EOF; 

<am_media> 

EOF 

# print AMJDBJS «EOF; 

# <?xml:stylesheet type-'text/xsl" href- , amico_objects.xsr , ?> 

# <am_objects> 
#EOF 

# print AM_MEDIA «EOF; 

# <?xml:stylesheet type="text/xsr 1 href= ,, amico_media.xsl"?> 

# <am_media> 
#EOF 

while (<>){ 

if (my Stag = &get_record) { # get next record and tag name 

if (Stag eq "AID"){ 

select(AM_OBJS); 

print &ind, "<am_object>\n M ; 

Stab += 2; 
&output_fields(@record) ; 

Stab -= 2; 

print &ind, '^/am^object^n"; 
} elsif (Stag eq "XID") { 
select(AM_MEDIA); 
print &ind t "<am_media_metadata>\n"; 
Stab += 2; 

&output_fields(@record) ; 
Stab -= 2; 

print &ind, H </am__media_metadata>\n"; 
} else { 

print STDOUT "*** ERROR: unknown tag <$tag> in record: <@record>\n"; 

} 

} else { 

print STDOUT ERROR: get_record failed\n"; 

} 

} 



print AM_OBJS "</am_objects>\n"; 
print AM_MEDIA "</am_media>\n M ; 
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Perl Script to convert XML SLA version 
into software independent OAV 
representation, ready to be # loaded 
into a variety of engines: 

- Prolog engine, or 

- relational database engine, or 

- XML database engine, or 

- other 



SCRIPT devised by Richard Marciano & 
Bertram Ludaescher & 
Reagan Moore 
# August 20, 2000, copyright RiM + BL + ReM 
# 



# ! /usr/local/bin/perl 
#use strict; 



36F 



36G 



36H 



361 



36J 



n 

@boa,- %boatype_hash = ( ) ; my $bill_count = 0; 
my $amdt_count - 0; my $con_res_count = 0 ,- 
my $j_res_count = 0; my $res_count - 0; 

©abstract; %a_value = (); 
@congressional_record; %cr_value = 0; 
©cosponsors; %cs_value = {); @date_introduced; 

%di_value = (); ©digest; %d_value = (); 
@latest_status ; %ls_value = (); @status_actions; 

%sa_value = (); ©of f icial_title ; %ot_value = (); 
©sponsor; %s_value = {); @statement_of_purpose ; 

%sop_value = () ; @submitted_by ,- %sb_value = () ; 
@submitted_for ,- %sf_value = () ; @f ilename_senator ; 
@f ilename_period; ©prepared_by ; ©senator; 
©occur rence_sect ion ; @occurrence_committee ; 

@topic_index; %ti_value= (); 

# 

my $MORE_SIZE =20; ~ 

my $bill_index_2 - "" ; 
my %tempH = ( ) ,- 

my $line; my $hl ; my $h2 = "■'; my $h3 ; my $h4; my $h5 ; my $h6_l = ; my $h6_2 



36A 



36B 



36C 



36D 



36E 



36K 



36L 



36M 



36N 



36Q 



36P 



36Q 



36R 



36S 



36T 



36U 



36V 



36W 



36X 



36Y 



36Z 



my $prev_h2 = " " ; 

my $section = 0; my $committee = ""; 

my $senNAME = " " ; my $state - ""; my $senid; 

my $ f i 1 ename ; 
my $line_number ; 
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my ©allfile; 



opendir THISDIR , " XMLDATA" or die "can't find DIRECTORY: $!"; 
©all file = grep /_LAR/ readdir THISDIR; 
closedir THISDIR; 

open( LOG, ">logfile" ) || die " * ERROR : can't open logfile\n" ; 



# 



foreach my $sen (@allfile) { 



#my $sen = "D_$ARGV [ 1] _LAR$ARGV [0] _106 . xml ' 
#my $sen = "D__1CP_LAR_S106_106 .xml " ; 
#my $sen = "D_1CP_LAR_S216_106 . xml " ; 
#my $sen = "D_1_LARI_S272_106 . xml " ; 

$sen =- m/.+\_.+\_.+\_S{\d+)\_.+/; 
$senid = $1; 
$senid = $1; 
$ s enN AM E = " " ; 



$ filename = $sen,- 
$line_number = 0; 



open( SEN106, "XMLDATA/ $ sen" ) | | die " * ERROR : can't open $sen\n" ; 
#open( SEN106, "$sen" ) || die " * ERROR : can't open $sen\n" ; 

&process_header ( $sen ); 

Nl: while ( $line = <SEN106> ) { 
$line_number++ ,- 

N2 : ; 

# DETECT SECTION headers 

if { $line =- m/hidden= "on" >(.+)< \/string>/ ) { 
$hl = $1; 

# <p align="left" bold="on" xstring bold="on n >SECTI0N I. SPONSORED 
MEASURES*;/ string ></p> 

if ( $hl = ~ m/SECTION I\./ ) { 

$section = 1; $h6_l = " n ; $h6_2 = 



my $ov = $senid . "_" . " $line_number " ; 
my $len = $#occurrence_section; 

$occurrence_section [ $len + 1 ] = [ ($senid, $line_number , 
$ sect ion, $hl) ] ; 
} 

elsif ( $hl m/SECTION II\./ ) { 

$section = 2; $h6_l = "«; $h6_2 = ""; 



my $ov = $senid . "_" . " $line_number " ; 
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elsif { $hl =- m/SECTION IV\ . / ) { 
$section = 4 ; 

my $ov = $senid . M _" . " $line_number " ,- 
my $len = $#occurrence_section; 

$occurrence_section [ $len + 1 ] = [ ($senid, $line_number , 
$section, $hl) ] ; 
} 

elsif ( $hl = ~ m/SECTION V\ . / ) { 

$section = 5; $h6_l = ; $h6_2 = »' » ; 

my $ov = $senid . "_" . " $line_number " ; 

my $len = $#occurrence_section; 

$occurrence_sect ion [ $len + 1 ] = [ ($senid, $line_nutnber , 
$section, $hl) ] ; 

} 

elsif ( $hl =- m/SECTION VI\./ ) { 

$section = 6; $h6_l = j $h6_2 = 

my $ov = $senid . . " $line_number " ; 

my $len = $#occurrence_section; 

$occurrence_sect ion [ $len + 1 3 = [ ($senid, $line_number , 
$section / $hl) ] ; 
} 

elsif ( $hl =- m/SECTION VII\./ ) { 

$se'ction = 7; $h6_l = » " ; $h6_2 = " " ; 

my $ov = $senid . . " $line_number " ; 

my $len = $#occurrence_section; 

$occurrence_section [ $len + 1 ] = [ ($senid, $line_number , 
$section, $hl) 3 ; 

&process_index; 

} 

else { 

print LOG "!!! error :C0ULD NOT RECOGNIZE SECTION NUMBER !!!!\n" 

} 



# DETECT **** BILL NUMBERS 

elsif ( $line m/>\*\*\*\* (.+)<\/p>/ ) { 
$h2 = $1; 

$h2 =- s/\s*//g ; 



if ( $prev_h2 ne " " ) { 

# S.123 1 dot 

# S.Amdt.123 2 dots 
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$boatype_hash{ bill }{ $bill }++; 

} 

} 

else { 

if ( $list[l] eq "Amdt" ) { 
$amdt_count++ ; 

my $bill; 

foreach $bill { sort keys % { $tempH{ $prev_h2 } } ) { 
if ( $boatype_hash{ amdt }{ $bill } eq " " ) { 
$boatype_hash{ amdt }{ $bill } = 0; 

} 

$boatype_hash{ amdt }{ $bill }++; 

} 

} 

elsif ( $list[l] eq "Con" ) { 
$con_res_count++ ; 

foreach my $bill { sort keys % { $tempH{ $prev_h2 } } ) 
if ( $boatype_hash{ con_res }{ $bill } eq " " ) 
$boatype_hash{ con_res }{ $bill } = 0; 

} 

$boatype_hash{ con_res }{ $bill }++; 

} 

} 

elsif ( $list[l] eq "J" ) { 
$ j_res_count++ ; 

foreach my $bill ( sort keys % { $tempH{ $prev_h2 } } ) 
if ( $boatype_hash{ j_res }{ $bill } eq " " ) { 
$boatype_hash{ j_res }{ $bill } = 0 ; 

} 

$boatype_hash{ j_res }{ $bill }++; 

} 

) 

elsif ( $list[l] eq "Res" ) { 
$res_count++ ; 

foreach my $bill ( sort keys % { $tempH{ $prev_h2 } } ) 
' if ( $boatype_hash{ res } { $bill } eq "" ) { 
$boatype_hash{ res }{ $bill } = 0; 

} 

$boatype_hash{ res }{ $bill }++; 

} 

} 

else { 

print "$prev_h2\t ERROR in recording boatype_hash\n" ; 

} 

} 

$list = ""; 

} 

$prev_h2 = $h2 ; 
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# SKIP OVER header SECTIONS 
#<header> 

#<p align="center" bold="on" italic="of f "><f ieldxf ldinst> PAGE 

</f Idinstxf ldrslt xstring chars tyname= " " bold="on" italic= "of f " >2</string> 

# </f ldrslt x/fieldx/p> 

#<p align=" right " bold^'on" italic= "of f " xstring bold="on" italic="of f " >Paul 
S. Sarbanes</stringx/p> 

#<p align=" lef t" bold^'on" italic="of f " xstring bold="on" italic= "of f " >SECTION 
IV. COSPONSORED MEASURES</str ingx/p> 

#<p align-" left" bold="on" italic= "of f" xstring bold-"on" 

italic="of f ">&tab;&tab; ORGANIZED BY COMMITTEE REFERRAL</stringx/p> 

#<p align=" lef t " bold="on" italic= "of f " xstring bold-"on" 

italic="of f ">&tab;&tab; SENATE: AGRICULTURE</stringx/p> 

#<p align="left" bold="off" italic= "of f " x/p> 

#</header> 

elsif ( $line =- m/<header/ ) { 
my $ i = 0 ; 

while ( $line = <SEN106> ) { 
$line_number++; 

if ( $line m/<p align- .+>(.+) <\/p>/ ) { 
$h4 = $1; 

$ i + + ; 

if { $i -= 3 && ( $section == 2 | | $section — = 3 | | $section 

= = ,4 ) ) { 

$h4 =- m/SECTION (.+)\. . +<\/string>/ ,- 
$h5 = $1; 

if ( $h5 eq "III" ) { 
$committee = 3 ; 

} 

elsif ( $h5 eq "IV" ) { 
$ commit tee = 4; 

} 

else { 

$ committee - ""; 

} 

} 

elsif ( $i == 5 ScSc { $committee == 3 | | $committee == 4 ) ) { 
$h4 = - m/SECTION <-+)\. . +<\/string>/ ; 
$h4 =- m/&tab; &tab; \s+ ( . +) <\/string>/ ; 
$h6_l = "COMMITTEE" ; 
$h6_2 = "$1" ; 

my $ov = $senid . "_" . " $1 ine_number " ; 

# $occurrence_value{ $ov } = ""; 

my $len = $#occurrence_committee ; 
$occurrence_committee [ $len + 1 ] = [ <$senid, 
$line_number , $committee / $h6_2 ) ]; 

} 

} 
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my $ov = $senid . "_" . " $line_number " ; 

$di_value{ $val } = " " ; 

my $len - $#date_introduced; J 

$date introducedt $len + 1 ] - [ ($senid, $line_number , $val) ]; 



if ( $section == 1 | | $section == 3 ) { 
$tempH{ $h2 } { SPONSOR} = "$senid"; 



my $ov = $senid . . ; 

my $s - $senid; 
$s =- s/ /_/g; 
$s_value{ $s } = 

my $len - $#sponsor; 

$sponsor[ $len + 1 ] = [ ($senid, $s) ] ,- 



} 

elsif ( $section == 2 | | $section == 4 ) { 
$line = <SEN106>; 
$ 1 i ne_numbe r + + ; 

<p bold="off" italic="of f ">SPONSOR: Daschle</p> 
if ( $line =- m/<p .+>(.+): (.+)<\/p>/ ) { 
my $mysponsor = $1; 
my $value = $2; 
$tempH{ $h2 } { SPONSOR} ^ $value; 



my $ov = $senid . "_" . " $line_number " ; 

my $s = $value; 
$s =- s/ /_/g; 
$s_value{ $s } = 

my $len - $#sponsor; 

$sponsor[ $len + 1 ] = [ ($senid / $line_number , $s) 3 ; 



} 



'} 



elsif { $section -= 5 ) { 

<p>SUBMITTED FOR: S- 4 &tab ; CONGRESSIONAL RECORD: S1830</p> 

$line = <SEN106>; 
$line_number+-*- ; 

if ( $line m/<p> ( . + ) <\/p>/ ) { 
my $ submit - $1; 
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my $ov = $senid . "_" . " -l"; 



my $s = $senid; 
$s — s/ /_/g; 
$s_value{ $s } = 



my $len = $#sponsor; 

$sponsor[ $len + 1 ] = [ ($senid, $s) ]; 



$line = <SEN106>; 
$line_number++ ; 

if { $line =- m/<p>(.+): (.+)<\/p>/ ) { 
my $ submi 1 t ed_by = $ 1 ; 
my $ value = $2; 

$tempH{$h2} {SUBMITTED_BY} = $value; 



my $ov = $senid . . " $line_number " ; 



my $sb = $value; 
$sb = - s/ /_/g; 
$sb_value{ $sb } = 

my $len = $#submitted_by ; 

$ submi t ted_by [ $len + 1 ] = [ ($senid, $line_number , $sb) 



} 

elsif ( $line =- m/<p align= .+>(.+) <\/p>/ ) { 
$h3 = $1; 
goto N4 ; 

} 



elsif ( $section 6) { 

# <p> SUBMITTED FOR: S. 4 &tab ; CONGRESSIONAL RECORD : S1830</p> 

# <p>SPONSOR: Murray</p> 

# <p>SUBMITTED BY: Bingaman< /p> 
$line - <SEN106>; 

$ 1 ine_number + + ; 

if { $line =- m/<p> ( . + ) <\/p>/ ) { 
my $submit = $1; 
. my ($partl, $part2) = split (/&tab;/ , $submit) ; 
my ($partl_l, $partl_2) = splitf/: /, $partl) ; 
my {$part2_l, $part2_2) = split(/: /, $part2) ; 



$partl_2 =- s/\s*//g ; 

$tempH{$h2} {SUBMITTED_FOR} = $partl_2; 
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$tempH{$h2 } {CONGRESS I ONAL_RECORD} = $part2_2 ; 



$ov = $senid . "_" . M $line_number f ' ; 

my $cr - $part2_2; 
$cr = ~ s/ /_/g; 
$cr_value{ $cr } = " " ,- 

$len = $#congressional_record; 
## $congressional_record [ $len + 1 ] = [ ($senid, 

$line_number / $cr, $h2) ] ,- 

$congressional_record [ $len + 1 3 = [ ($senid, 
$line_number , $cr) ] ; 
tt 

} 

$line = <SEN106>; 
$line_number++ ; 

if ( $line =~ m/<p>(.+): (.+)<\/p>/ ) { 
my $mysponsor - $1; 
my $ value = $2 ; 

$tempH{$h2} {SPONSOR} = $value; 



my $ov = $senid . . " $line_number" ; 

my $s = $value; 
$s — s/ /_/g; 
$s_value{ $s } = ""; 

my $len = $# sponsor; 

$ sponsor [ $len + 1 ] - [ ($senid, $line_number , $s) ] 



} 

$line = <SEN106>; 
$line_number++ ,- 

if ( $line =- m/<p>(.+): (.+)<\/p>/ ) { 
my $submitted_by - $1; 
my $value = $2; 

$ tempH { $h2 } { SUBMITTED_BY } = $value ; 



my $ov = $senid . "_" . M $line_number" ; 

my $sb = $ value; 
$sb = ~ s/ /_/g; 
$sb_value{ $sb } = ""; 

my $len = $#submitted_by ; 
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N4:if { $h3 eq "COSPONSORS" ) { 
# <p bold= n off" italic= n of f ":>Edwards; Bayh; Kerry; Bingaman (A- 

11/05/1999) :</p> 

$line = <SEN106>; 
$1 ine_number++ ; 

if ( $line =- m/<p ,+>(.+) <\/p>/ ) { 
my $mycosponsors - $1; 

if ( ($mycosponsors ne " M ) && ( $mycosponsors ne "NONE") ) { 
$tempH { $h2 } { COSPONSORS } = $mycosponsors ,- 



my $ov - $senid . . " $line_number " ,- 

my $cs = $mycosponsors; 
$cs = ~ s/ /_/g; 
$cs_value{ $cs } - " " ; 

my $len = $#cosponsors ; 

$cosponsors [ $len + 1 ] = [ ($senid, $line_number , $cs) ] 



} 

else { 

$ tempH { $h2 } { COSPONSORS } = " NONE" ; 



my $ov = $senid . "_" - " $line_number " ; 

my $cs = $ my cos pons or s ; 
$cs =- s/ /_/g; 
$cs_value{ $cs } = " " ; 

my $len = $#cosponsors ; 

$cosponsors [ $len + 1 ] = [ ($senid, $line_number , $cs) ] 



} 



} 

} 

else { 

print LOG "!!! error : $senid : $h2 : $h3 COSPONSORS tag ! ! ! ! \n" ; 

} 



elsif ( $h3 eq "OFFICIAL TITLE" ) { 

<p bold="off" italic="of f "> </p> 

$line = <SEN106>; 
$ 1 i ne_nutnbe r + + ,- 

if ( $line =- m/<p .+>(.+)< \/p>/ ) { 
my $title = $1; 

$tempH { $h2 } { OFFICIAL_TITLE } = $title; 



my $ov = $senid . "_" - " $line_number " ; 
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elsif ( $h3 eq "LATEST STATUS" | | $h3 eq "STATUS ACTIONS" ) { 

# <p bold="of f "xstring bold= "on" >Oct 25, 1999&tab; Became Public 
Law No: 106-80 . </stringx/p> 

# <pxstring>May 27, 1999&tab; Proposed amendment S.Amdt. 3 87 
withdrawn in Senate . </stringx/p> 

# <pxstring>May 27, 1999&tab; Proposed by Senator Levin for Senator 
Sarbanes . </stringx/p> 

# <p align="center" italic= "of f " >ABSTRACT</p> 
my $cumulative_content = " " ; 

my $i = 0; 

my $ s ave_l ine_numbe r ; 
while ( $line = <SEN106> ) { 
$1 ine_number++ ; 

if ( $i == 0 ) { $save_line_number = $1 ine_number ; } 
$ i + + ; 

if ( $line =- m/<p .* xstring .*>(.+) <\/string><\/p>/ ) { 
my $content = $1; 

$cumulat ive_content .= "$content CCCRRR " ; # replace \n 

with » CCCRRR " 

} 

else { 

if ( $h3 eq "LATEST STATUS " ) { 

$ tempH { $h2 } { LATEST_STATUS } = $cumulative_content ; 

} 

elsif ( $h3 eq "STATUS ACTIONS" ) { 

$tempH { $h2 } {STATUS_ACTIONS } = $cumulat ive_content ; 

} 

# 



my $ov = $senid . "_" . " $line_number" ; 

if ( $h3 eq "LATEST STATUS" ) { 

my $status = $cumulative_content ; 
$status =- s/ /_/g; 
$ls_value{ $status } = 



my $len - $#latest_status ; 

$latest_status [ $len + 1 ] = [ ($senid, $line_number , 

$status) ) ; 

} 

elsif { $h3 eq "STATUS ACTIONS" ) { 
my $status = $cumulative_content ; 
$status =- s/ /_/g; 
$sa_value{ $status } = ""; 



$status) ] 



my $len = $#status_act ions ; 

$status actions [ $len + 1 ] = [ ($senid / $line_number , 



} 

goto N2 ; 
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} 

elsif ( $h3 eq "ABSTRACT" ) { 
<p italic="of f ">NONE</p> 
$line = <SEN106>; 
$ 1 ine_number++ ; 

if { $line =- m/<p. *>(.+) <\/p>/ ) { 
my $abstract = $1 ; 

$tempH{$h2} {ABSTRACT} = $abstract ; 



my $ov - $senid . "_" 

my $a = $abstract ,- 
$a =- s/ /_/g; 
$a_value{ $a } = 

my $len - $#abstract; 
$abstract [ $len + 1 ] 

# 

} 

else { 

print LOG " ! ! ! error : $senid : $h2 : $h3 ABSTRACT tag ! ! ! !\n" 

} 

} 

elsif ( $h3 eq "STATEMENT OF PURPOSE" ) { 



# <p align=" center" italic= "of f " >STATEMENT OF PURPOSE</p> 

# <p italic= "of f " > . . . </p> 

# OR 

# <p align="center" italic= "of f " >ABSTRACT</p> 

# OR 

# </section> 



$line <SEN106>; 
$ 1 ine_number++ ; 
# 

my $ov - $senid . "_" . 11 $1 ine_number " ; 
# 

if ( $line m/<p italic= .+>(.*) <\/p>/ ) { 
my $stmt = $1; 

if ( $stmt eq "" ) { 

$tempH { $h2 } { S TAT EMENT_OF_PUR POSE } = $Stmt; 

} 

else { 

$tempH { $h2 } { S TAT EMENT_OF_PUR POSE } = $stmt; 

} 

# 

my $sop = $stmt,- 
$sop =- s/ /_/g; 



. " $ 1 ine_number " ; 



= [ (Ssenid, $line_number , $a) ] ; 
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my $sop = " " ; 
$sop_value{ $sop } = '*"; 

my $len = $#statement_of_purpose ; 
$state'ment_of _ purpose [ $len + 1 ] - [ ($36111(5, $line_number , 



! ! ! ! \n n 



#- 



goto N2 ; 

} 

else { 

print LOG "!!! error : $senid: $h2 : $h3 STATEMENT OF PURPOSE tag 

} 

} 

elsif ( $h3 eq "DIGEST" ) { 

<p italic=«'of f ">NONE</p> 
$line = <SEN106>; 
$line_number++ ; 

if ( $line = - m/<p .+>(.+) <\/p>/ ) { 
my $mydigest = $1; 
$tempH{$h2} {DIGEST} = $mydigest; 



#- 



my $ov - $senid . . " $line_number" ; 

my $d = $mydigest; 
$d =- s/ /_/g; 
$d_value{ $d } = ""; 

my $len = $#digest; 

$digest [ $len + 1 ] = [ ($senid, $line_number , $d) ]; 



} 



} 

else { 

print LOG " ! ! ! error : $senid: $h2 : $h3 DIGEST tag ! ! ! ! \n" ; 

} 



else { 

print LOG M ! ! ! error : $senid : $h2 : $h3 UNKNOWN tag !!!!\n" ; 

} 

} 

} # END WHILE 

THEEND: ; 

$prev_h2 = $h2 ; 

} # comment out foreach loop 
my $buf f = "" ; 
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PROCS : 



if( $inputstr eq 




) 


\ yOtO MlbWU; } 








elsif ( 


$ input str 


eq 


II Vi II \ 

o ) 


{ Scprint table ( 


"boa" ) ; } 






elsif ( 


$ input str 


ec J 


tl -a if \ 

« / 


{ &print table ( 


"abstract" ) ; } 






elsif ( 


$inputstr 


eq 


" ha " ) 


{ &print_hash ( 


"a_value" ) ; } 






elsif ( 


$ input st r 


eq 


"cr" ) 


{ &print_table ( 


"congressional_record" 




elsif { 


$ input str 


eq 


"her" ) 


{ Scprint hash ( 


"cr_value" ) ; } 






elsif { 


$ input str 


eq 


"cs" ) 


{ &print_taDle ( 


"cosponsors" ) ; } 






elsif ( 


$ input str 


eq 


"hes" ) 


{ &print_hash ( 


"cs_value" ) ; } 






elsif ( 


$inputstr 


eq 


ai ' ) 


{ &pr int_table ( 


"date_introduced" 


); } 




elsif ( 


$inputstr 


eq 


" hdi 11 ) 


{ &print_hash ( 


"di_value" ) } 






elsif ( 


$inputstr 


eq 


" d " ) 


{ &print_table ( 


"digest" ); } 






elsif ( 


$ input str 


eq 


" hd" ) 


{ &print_hash ( 


"d_value" ); } 






elsif { 


$ input str 


eq 


"Is" ) 


{ &print_table ( 


"latest_status" ) 


; } 




elsif { 


$ input str 


eq 


It 1. 1 r. 11 \ 


{ &print_hash ( 


"ls_value" ) ; } 






elsif ( 


$ input str 


eq 


"sa" ) 


\ &print taoie i 


" status_act ions " 


); } 




elsif ( 


$ input str 


eq 


"hsa" ) 


{ &print_hash ( 


"sa_value" ) ; } 






elsif ( 


$inputstr 


eq 


"ot" ) 


{ &print_table ( 


"of f icial_title" 


); } 




elsif ( 


$inputstr 


eq 


"hot" ) 


{ &tprint_hash ( 


"ot_value" ) ; } 






elsif ( 


$inputstr 


eq 


H s it ) 


\ &print tar>ie { 


"sponsor" ) ; } 






elsif ( 


$ input str 


eq 


•I Vi es II \ 

ns / 


{ Stprint hash ( 


"s_value" ) ; } 






elsif ( 


$ input str 


eq 


sop ) 


{ &print table ( 


" stat erne nt_of _ pu rpose " 


. \ 


elsif ( 


$inputstr 


eq 


"hsop" ) 


{ &print_hash ( 


"sop_value" ) ; } 






elsif ( 


$inputstr 


eq 




{ &pr int__table ( 


" submit ted_by" ) ; 


} 




elsif { 


$ input str 


eq 


nso } 


{ &pr int__hash ( 


"sb_value" ) ; } 






elsif ( 


$inputstr 


eq 


"sf" ) 


{ &print_table ( 


"submitted_f or" ) 


; } 




elsif ( 


$inputstr 


eq 


"hsf" ) 


{ &print_hash ( 


"sf_value" ) ; } 






elsif ( 


$ input str 


eq 


"ti" ) 




"topic_index" ) ; 


} 




elsif ( 


$ input str 


eq 


"hti" ) 


{ &print_hash( 


"ti_value" ) ; } 






elsif ( 


$ input str 


eq 


"1" ) 


{ &print_table ( 


"f ilename_senator 


" ); } 




elsif ( 


$inputstr 


eq 


"2" ) 


{ &print_table ( 


"senator" ) ; } 






elsif ( 


$inputstr 


eq 


,, 3 „ j 


{ &print_table ( 


" f ilename_ period " 


); } 




elsif ( 


$ input str 


eq 


"4" ) 


{ Seprint_table ( 


"prepared_by" ) ,- 


} 




elsif ( 


$ input str 


eq 


"5" ) 


{ &print_table ( 


" occur rence_sect ion" ) ; 


> 


elsif { 


$ input str 


eq 


"6" ) 


{ &print_table ( 


"occurrence_committee" [ 


; } 


elsif ( 


$ input str 


eq 


"pp" ) 


{ &pretty_print_ 


tables ( "106" ) ; 


} 




else { 


print "\t\t**** WRONG 


OPTION ****\n" ; 


'} 






&print_prompt ; 















MYEND : 



close LOG; 
exit ; 
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print 


"\t\"d\" . 


digest 


\ 


"hd\ " . 


d_value\n M ; 


print 


"\t\" ls\" 


latest status 


\ 


"hls\" . 


ls_value\n*' ,- 


print 


"\t\"sa\" 




\ 


"hsa\" . 


sa_value\n" ; 


print 


"\t\"ot\" 


official title 


\ 


"hot\ n . 


ot_value\n" ; 


print 


"\t\"s\" . 




\ 


"hs\" . 


s_value\n" ; 


print 


"\t\ M sop\ 


" . statement_of ^purpose 


.... \ 


u hsop\ " 


. sop_value\n" ; 


print 


"\t\" sb\" 


submitted by 


...... \ 


"hsb\" . 


sb_value\n" ; 


print 


"\t\"sf \" 


submitted for 


\ 


"hsf \" - 


sf_value\n" ; 


print 


"\t\ ,, ti\" 




\ 


"hti\" . 


t i_value\n" ; 


print 


"\t\n"; 










print 


"\t\"l\" . 


f i 1 ename_sena t or 


-- V'2\ 


" . senator\n" ; 


print 


"\t\"3\" . 


f i 1 ename_pe r i od 


-- \"4\ 


" . prepared_by\n" ; 


print 


"\t\"5\" . 


occurrence_sect ion 


\"6\ 


" . occur rence_commit tee 


print 


"\t\n"; 










print 


"\t\"pp\" 


. pretty print (Prolog) 


\n" ; 






print 


it ****************************** 


***\n" ; 







sub pretty_print_tables { 
my $sid = $_[0] ; 



my $DIR = " Prolog/sen_$senid" ; 
mkdir $DIR, 0755; 



open( PP, " >$DIR/boa.P" ) || die " * ERROR : can't open\n" ; 
&pretty_print { "boa" ) ; 
close PP; 

open( PP, ">$DIR/abstract.P" ) | | die " * ERROR : can't open\n" ; 
&pretty_print ( "abstract" ) ,- 
close PP; 

opent PP, " >$DIR/congressional_record. P" ) || die " * ERROR : can't open\ 
&pretty_j?rint ( "congressional_record" ) ; 
close PP; 



open( PP, " >$DIR/cosponsors . P" ) || 
&pretty_print ( "cosponsors" ); 
close PP; 

open( PP, " >$DIR/date_introduced. P" 
&pretty_print { "date_introduced" ) ; 
close PP; 

open( PP, ">$DIR/digest .P" ) || die 
&pretty_print ( "digest" ); 
close PP; 

open( PP, ">$DIR/latest_status .P" ) 
&pretty_jprint ( "latest_status" ); 
close PP; 



die f 1 * ERROR : can't open\n" ; 



) || die " * ERROR : can't open\n" ; 



♦ERROR : can ' t open\n" ; 



|| die " * ERROR : can't open\n" ; 



open( PP, ,! >$DIR/status_actions . P" ) | | die " * ERROR : can't open\n" ,- 
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} 



open( PP, " >$DIR/statement_of_purpose . P" ) || die "* ERROR : can't open\n 
&pretty_print ( "statement_of_purpose" ); 
close PP; 

open( PP, M >$DIR/submitted_by.P" ) | | die " * ERROR : can't open\n" ; 
&pretty__print ( " submit ted_by M ); 
close PP; 

open( PP, ">$DIR/submitted_for .P" ) || die *'*ERROR: can't open\n" ; 
&pretty_print { "submitted_f or" ),- 
close PP; 

open{ PP, " >$DIR/topic_index .P" ) || die "* ERROR : can't open\n" ; 
&pretty_print ( " topic_index" ); 
close PP; 

open( PP, " >$DIR/f ilename_senator.P" ) || die " * ERROR : can't open\n" ; 
&pretty_print { " f ilename_senator " ); 
close PP; 

open( PP, " >$DIR/ f ilename_period.P" ) || die " * ERROR : can't open\n" ; 
&pretty_print ( 11 f ilename_period" ); 
close PP; 

open( PP, ">$DIR/ senator .P" ) || die ***ERROR: can't open\n" ; 
&pretty_print ( "senator" ); 
close PP; 

open( PP, ">$DIR/prepared_by .P" ) || die " * ERROR : can't open\n" ; 
&pretty__print { "prepared_by M ) ; 
close PP; 

open( PP, " >$DIR/occurrence_section. P" ) || die " * ERROR : can't open\n" 
&pretty_print { "occurrence_section" ); 
close PP; 

open{ PP, " >$DIR/ occur rence_commit tee. P" ) || die "* ERROR : can't open\n 
&pretty_print { " occurrence_commi t tee " ); 
close PP; 



sub pretty_print { 

my $arg_table = $_[0] ; 

# digest ( 'quoted* strings ' , , -.). 

my $buff = 

my @arr = " " ; 

my $inputstr - ""; 

no strict; 



FIGURE 360 



PERSISTENT ARCHIVES 
Inventors: Reagan W. Moore, et al. 
Howrey Docket No. 02737.0004.NPUS01 
88/118 



else { 

print PP "\ * $aref-> [$j] \ • , " ; 

} 

} 

$aref->[$n] =- s/_/ /g ; 

if( $arg_table eq "latest_status " || $arg_table eq "status_actions" ) 

{ 

$aref->[$n] =~ s/ CCCRRR /\n/g; 

my ©list = split (/\n/, $aref - > [$n] ) ; 

my $len = $#list; 

my $newstr = "[" ; 

foreach my $i (0 . . $len ) { 

my ($ls_date / $ls_mesg) - split ( /&tab; / , $list[$i]); 

$newstr . = "d(\'$ls_date\ 1 , "; 

$newstr .= " \ ' $ls_mesg\ ' ) " ; 

if ( $len > 0 && $i < $len ) { 
$newstr .= " , "; 

} 

} 

$newstr . = " ] " ; 
$aref->[$n] = $newstr; 
print PP "$aref->[$n] " ; 
print PP ") \. \n" ; 

} 

elsif( $arg_table eq "cosponsors" ) { 
#Dodd; Bryan; Leahy; Edwards; Hollings; Breaux (A-02/08/2000 ) : 

if ( $aref->[$n] =- m/ . + : / ) { 
chop $aref-> [$n] ; 

} 

my ©colist - split (/;/, $aref - > [$n] ) ; 
my $newstr - " [" ; 
foreach my $item {©colist ) { 
my $ items = ""; 

$item =- m/\s*(.+)/ && ($items = $1) ; 
my $co_name = " " ; 
my $co_amend = " " ; 
# Mikulski (A-ll/08/1999) 

if ( $items m/(.+) \((.+)\)/ ) { 

$co__name = $1; 

$co — amend = $2; 

$newstr . = " d ( \ ' $co_name\ ' , \ ' $co_amend\ ' ) , " ; 

} 

.else { 

my $cosponsor_val = $ items; 
$newstr . = " \ * $cosponsor_val\ ' , " ; 

} 

} 

chop $newstr; 
chop $newstr; 
$newstr . ^ " 3 " ; 
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sub print_bills { 
my $bill; 
my $ field ; 

foreach $bill ( sort keys %HoH ) { 
print "<bill name=\ " $bill\ " >\n" ; 
my $flag_NONE = "false"; 

foreach $field ( sort keys %{ $HoH{$bill} } ) { 
if ( $field eq " DAT E_ INTRODUCED" ) { 
print " 

<date_introduced>$HoH{$bill } { $f ield} </date_introduced>\n" ; 
} 

# 

elsif( $field eq "SPONSOR " ) { 

my $sponsor_val ~ $HoH{ $bill } { $f ield} ; 
my $val = " " ; 
my $print_f ield; 

$sponsor_val =- m/* {\d+) / && ($val - $1) ; 
if ( $val ne ) { # then it 1 s a number 
my $key = $senNUMHash{ $val } ; 

$print_field = $keypeopleHash{ $key }{ nameURI }; # use of 
uninitialized value! ! ! 

print " <sponsor>$print_f ield</sponsor>\n" ; 

} 

else { 

$sponsor_val =- s/ //g; 
$sponsor_val = lc $sponsor_val ; 

if ( exists ( $keypeopleHash{ $sponsor_val } ) ) { 

$print_f ield = $keypeopleHash{ $sponsor_val } { nameURI 

}; 

print " <sponsor>$print_f ield</sponsor>\n" ; 

} 

else { 

print LOG "!!! In print_bills: in SPONSOR section: 
keypeopleHash{ $sponsor_val } DOES NOT EXIST ! \n" ; ^ 

print " <sponsor>$HoH{$bill} {$f ield} </sponsor>\n" ; 

} 

} 

} 

# 

elsif ( $field eq "COSPONSORS" ) { 

my $cosponsors = $HoH{$bill} {$f ield} ; 
if ( $cosponsors -~ m/ . + : / ) { 

chop $cosponsors; 
} #=== COMMENT if you want COSPONSOR^NONE to disappear 
print " <cosponsors>\n" ; 
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if ( $items — m/(.+) \((.+)\)/ ) { 
$co_name - $1 ; 
$co_amend = $2 ; 
my $cosponsor_val = $co_name; 
my $val = " " ; 
my $print_f ield; 
$cosponsor_val =- s/ //g; 
$cosponsor_val = lc $cosponsor_val ; 
if ( exists ( $keypeopleHash{ $cosponsor_val } ) ) 

{ 

$print_f ield - $keypeopleHash{ $cosponsor_val 

} { nameURI } ; 

print " <co_name a- 

date=\" $co_amend\ " >$print_f ield</co_name>\n f ' ; 

} 

else { 

print LOG " ! ! ! In print_bills: in COSPONSORS 
section: keypeopleHash{ $cosponsor_val } DOES NOT EXIST! \n"; 

print " <co_name a- 

date=\"$co_amend\ " >$co_name</co_name>\n" ; 

} 

} 

else { 

my $cosponsor_val = $items; 

my $val - " " ; 

my $print_f ield; 

$cosponsor_val =- s/ //g; 

$cosponsor_val = lc $cosponsor_val ; 

if ( exists ( $keypeopleHash{ $cosponsor_val } ) ) 

{ 

$print_field = $keypeopleHash{ $cosponsor_val 

} { nameURI } ; 

print M 

<co_name > $pr int_f ie ld< / co_name > \n " 

} 

else { 

print LOG "!!! In printjbills: in COSPONSORS 
section: keypeopleHash{ $cosponsor_val } DOES NOT EXIST !\n"; 

print " 

<co_name>$items</co_name>\n" 

} 

) 

print " </cosponsor>\n" ; 

} 

print " </cosponsors>\n" ; 
# " ) ===== UNCOMMENT if you want COSPONSOR=NONE to disappear 

} 

elsif ( $field eq "OFFICIAL_TITLE ,! ) { 
print " 

<of f icial_title>$HoH{$bill} {$f ield}</of f icial_title>\n" ; 
} 

elsif ( $field eq " LATEST_STATUS " ) { 
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elsif ( $field eq "ABSTRACT" ) { 

print " <abstract>$HoH{$bill} {$f ield} </abstract >\n" ; " 

} 

elsif ( $field eq "COMMITTEE" ) { 

my $len = $#{ $HoH{ $bi 11 } {COMMITTEE} } ; 
print " <committees>\n" ; 
foreach my $i ( 0 . . $len ) { 
print " 

<committee>$HoH{$bill } {$f ield} [$i] </committee>\n" ; 
} 

print " </committees>\n" ; 

} 

elsif ( $field eq " S UBMI TTED_FOR " ) { 
print " 

<submitted_for>$HoH{$bill} { $f ield}</submitted_f or>\n" ; 
} 

elsif ( $field eq "CONGRESS I ONAL_RECORD" ) { 
print " 

<congressional_record>$HoH{$bill } { $f ield} </congressional_record>\n" ; 
} 

# 

elsif ( $field eq "SUBMITTED_BY" ) { 

my $submitted_by_val = $HoH{ $bill } { $f ield} ; 
my $val = " " ; 
my $print_f ield; 

$submitted_by_val =- s/ //g; 
$ submi 1 1 e d_by_va 1 = 1 c $ submi 1 1 ed_by_ va 1 ; 
if ( exists ( $keypeopleHash{ $ submi tted_by_val } ) ) 
$print_field = $keypeopleHash{ $ submi tted_by_val 

nameURI } ; 

print " 

< submi tted_by>$print_f ield</ submit ted_by>\n" ; 

} 

else { 

print LOG "!!! In print_bills: in SUBMI TTEED_BY 
section: keypeopleHash{ $ submi tted_by — val } DOES NOT EXIST !\n"; 

print " 

<submitted_by>$HoH{$bill} {$f ield} </submitted_by>\n" ; 
} 

} 

elsif ( $field eq "STATEMENT_OF_PURPOSE" ) { 
print " 

<statement_of_ purpose > $HoH { $bi 11 } { $f ield} </statement_of_purpose>\n" ; 

} • 

elsif ( $field eq "DIGEST" ) { 

print " <digest>$HoH{$bill} {$f ield} </digest>\n" ; 

} 

else { 

print LOG "!!! WRONG TAG: $f ield !!! in sub: print_bills\: 

} 

} 
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no strict; 

my $len = $# { $arg_table } + 1; 
print "$arg_table: COUNT=$len\n'» ; 
for my $i ( 0 . . $# { $arg_table } ) { 

if ( ! ($i % $MORE_SIZE ) && = = 0) ) { 

my $percent = ( ($i+l) / $len ) * 100; 
print "--More--"; 
printf{ " (%ld) n , $percent ); 
print "\% q: to quit\n" ; 
read{STDIN, $buff, 1) ; 

for { my $i = 0; ; read(STDIN, $buff, 1) ) { 
if ( $i = = 0 ) { @arr= n " ; } 
$arr[$i++] = $buff; 
if ( $buff eq »\n" ) { 

$inputstr = join * • , @arr; 

chop $ input str; 

goto NEXT; 

} 

$buff = " " ; 

} 

} 

NEXT : 

if ( $inputstr eq "q" ) { goto END_PRINT_TABLE ; } 

my $aref = $ { $arg_table } [$i] ; 
my $n = @$aref - 1; 
printf( M \t%5d:" , $i ); 
for my $j (0 - . $n ) { 

print "\t$aref -> " ; 

} 

print "\n n ; 

} 

END_PRINT_TABLE : ; 

} 

sub print_hash { 

my $arg__hash = $_[0] ; 

no strict; 

my $buff = ""; 

my @arr = " " " ; 

my $inputstr = ""; 

my $len = scalar keys % { $arg_hash} ; 
print "$arg_hash Hash: COUNT=$len\n ,! ; 
my $ i = 0 ; 

foreach my $key( sort keys %{$arg_hash} ) { 

if ( ! ($i % $MORE_SIZE ) && !($i 0) ) { 
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for C my $i = 0; ; read(STDIN, $buff, 1) ) { 
if ( $i == 0 J { ®arr=" n ; ) 
$arr[$i+ + ] = $buff; 
if ( $buff eq "\n" ) { 

$ input st r = join ■ ' , @arr; 

chop $ input str; 

goto NEXT 2 ; 

} 

$buff = »"; 

} 



NEXT2 : 



if ( $inputstr eq "q" ) { goto END_PRINT_HASH; } 
$i + +; 

print "\t*$key*\n"; 

} 

END PRINT HASH: ; 



#<section> 

#<p fontname= "Courier New" f ontsize=" 20 " ></p> 

#<p align- "center" f on t name =" Courier New" fontsize- "28 " bold="on" >LTNITED 
STATES SENATE</p> 
#<p align- "center" 
#<p align- "center" 
#<p align-"center" 



f ontsize= 
f ontsize= 



fontname= "Courier New 
f on t name = "Courier New 
fontname=" Courier New" fontsize 
#<p align="center" fontname=" Courier New" fontsize 
#<p align="left" fontname=" Courier New" fontsize="4 

bold-" on" > </p 

#<p aligns" left M fontname=" Courier New" fontsize="4 
#<p align- "left" f on t name =" Courier New" fontsize="4 
ACTIVITIES</p> 

#<p align="left" f on tname- 11 Courier New" font size- "4 

bold- " on " > < / p > 

#<p align="lef t" f on tname =" Courier New" fontsize="4 
#<p align-" left" f on tname -"Courier New" fontsize=" 3 
HONORABLE*: /p> 

#<p align="left" f on tname -"Courier New" fontsize="3 
SARBANES</p> 

#<p align-" lef t" fontname-" Courier New" fontsize- "3 



"28" bold= ,, on" ></p> 
"28" bold="on"></p> 
"28" bold-"on"x/p> 
"28" bold="on" ></p> 
8" 

8" bold= ,, on"x/p> 

8" bold- "on" LEGISLATIVE 



MARYLAND</p> 

#<p align= M left" font name- "Courier New" fontsize-" 3 
#<p align=" lef t" f ont name =" Courier New" fontsize="2 
PERIOD</p> 

#<p align-" lef t" f ont name* 11 Courier New 
1999 TO MARCH 31, 2000</p> 

#<p align-"left" fontname= "Courier New" fontsize="2 
#<p aligns" lef t" fontname=" Courier New" fontsize="4 

_</p> 

* " " "on 



bold= "on"> 



8" 

8" bold-"on"x/p> 
6" bold="on">THE 

6" bold="on">PAUL S. 

6" bold="on">0F 

6" bold="on"x/p> 
8" bold-"on">FOR THE 

fontsize="28" bold- " on "> JANUARY 06, 

8" bold-"on"x/p> 
8" 



#<p fontname= "Courier New" f ontsize="20 " bold= 



:/p> 
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#<p f on tname= "Courier New" f ontsize=" 20 " bold-"on" ></p> 
#<p fontname=" Courier New" f ontsize-" 20 " bold="on" ></p> 
#<p fontname=" Courier New" f ontsize=" 20 " bold= "on" ></p> 
#<p fontname=" Courier New" f ontsize="20 " bold= "on" ></p> 
#<p fontname= "Courier New" f ontsize="20" bold- "on" ></p> 
#<p align="left" foritname= "Courier New 
by : </p> 

#<p align="left" fontname^ "Courier New 
Computer Center</p> 

#<p align="left" fontname= "Courier New 
Sergeant at Arms</p> 

#<p align="left" fontname= n Courier New" f ontsize-"20" bold- "on" >and</p> 

#<p align="left" fontname=" Courier New" f ontsize= n 20" bold= "on" >Committee on 

Rules and Administration</p> 

#</section> 



f ontsize="20" bold="on" >Prepared 
f ontsize="20" bold="on" >Senate 
f ontsize="20" bold= "on" >Of f ice of the 



sub process_header { 

while ( $line - <SEN106> ) { 
$ 1 i ne_numbe r + + ; 

if ( $line =- m/<p align= .+>(.+) <\/p>/ ) { 
$h4 = $1; 

if ( $line =- m/THE HONORABLE/ ) { 



$line = <SEN106>; 
$ 1 ine_number+ + ; 

$line =- m/<p align= .+>(.+) <\/p>/ ; 
$senNAME= $1; 

my $first_line - $line_number ; 

$line = <SEN106>; 
$line_number++ ; 
$line =- m/OF (.+)<\/p>/; 
$state = $1; 

my ©senlist - splitt/ /, $senNAME) ; 
my $f irstname ~ $senlist [0] ; 
my $lastname = $senlist [$#senlist] ; 
my $middlepart = "" ; 

foreach my $i (1 ( $#senlist-l ) ) { 
$middlepart . = " $senlist [$i] " ; 

} 

chop $middlepart ,- 



$len = $#senator; 

$senator[ $len + 1 ] = [ ($senid, $first_line, $f irstname, 
$middlepart, $lastname, $state) ] ; 



$line = <SEN106>; 
$1 ine_number++ ; 
$line - <SEN106>; 
$line__number++ ; 
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$line = <SEN106>; 
$line_number++ ; 

$line = <SEN106>; 
$ 1 ine_number++ ; 

while ( $line = <SEN106> ) { 
$line_number++ ; 

if ( $line =- m/<p align=/ ) { 
goto BEGIN_Prepared_by ; 

} 

} 

BEGIN_Prepared_by : ; 

my $first_line2 = $line_number ; 

$line m/> ( . +) <\/p>/ ; 

my $cumulat ive_content = "$1 "; 

$line = <SEN106>; 
$line_number++ ; 
$line =- m/> ( . +) <\/p>/ ; 
$cumulat ive_content .- "$1 " ; 

$line = <SEN106>; 
$line_number++ ; 
$line =- m/> ( . +) <\/p>/ ; 
$cumulative_content .= "$1 "; 

$line - <SEN106>; 
$line_number++ ; 
$line =- m/> ( . +) <\/p>/; 
$cumulative_content .- "$1 

$line = <SEN106>; 
$line_number++ ; 
$line -- m/> ( . +) <\/p>/ ; 
$cumulat ive_content .- n $1 " ; 

my $ov = $senid . "_" - " $line_number" ; 

$len = $ttprepared_by ; 

$prepared_by [ $len + 1 ] = [ (Ssenid, $first_line2 
$cumulative_content) ] ; 

} 

$h4 = $1; 

} 

elsif { -$line =- ra/<\/section>/ ) { 
goto PHI; 

} 

} 

PHI : ; 

if ( $filename eq ,f D_l_LARI_S272_106 .xml " ) { 
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while ( $line = <SEN106> ) { 
$line_number++ ; 

if ( $line m/<\/section>/ ) { 
goto END_process_header ; 

} 

} 

END_process_header : ; 



# <pxstring italic= "of f " hidden= "on" >SECTION VII. SUBJECT 
INDEX</string>ACADEMIC PERFORMANCES tab; S.7, S.514, S.564</p> 

# <p>ACCESS TO HEALTH CARE&tab; S.6, S.1678, S.1690</p> 

sub process_index_old { 

$line = - m/<p> . +<\/string> ( . + ) &tab; ( . + ) <\/p>/ ; 
my $ subject = $1; 
my $bill_seq = $2; 

#<p align= "right " Administrative procedure- -Department of Health and Human 
Services&tab; S.331, S.1327</p> 

#<p>AGED&tab; S.10, S.51, S.331, S.391, S.472, S.718, S.784, S.792,</p> 
#<p align=" right »> S.1023, S.1074, S.1142, S.1327, S.1499, S.1678, S.1760< 

N3: while ( $line = <SEN106> ) { 
$line_number++ ; 

if ( $line = ~ m/<p.*>(.+)&tab; (.+\d) (.*)<\/p>/ ) { 
my $ subject = $1; 
my $bill_seq = $2; 
my $ comma = $3; 
# IS THERE A CONTINUATION. . . 
while ( $comma eq " , " ) { 

my $buf = <SEN106>; 

$line_number++ ,- 

$buf=- m/<p.*>( ,+\d) (.*)<\/p>/ ; 
my $bill_seq = $1; 
my $ comma = $2; 
if ( $comma eq 1,11 ) { 
goto N3; 

} 

} 

} 

#<px/p> 

"elsif ( $line =- m/<px\/p>/ ) { 

} 

else { 

goto THEEND; 

} 

} 

} 

# <p><string italic="off" hidden="on">SECTION VII. SUBJECT 
INDEX</string>ACADEMIC PERFORMANCE&tab ; S.7, S.514, S.564<:/p> 
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#<p align= " lef t " f ont name = "Courier New" f ontsize= "20 " bold=: "on" >Committee on 

Rules and Administrat ion</p> 

#</section> 

#<sectiori> 

#<header> 

#<p align= "center" fontname= "Courier New" f ontsize= " 20 " 

bold="on"><f ieldxf ldinst> PAGE </fldinstxf ldrslt xstring charstyname= " " 

# fontname= "Courier New" f ontsize= " 20 " 
bold= "on" >2</string></f ldrslt ></field></p> 

#<p align= " right " fontname= "Courier New" f ontsize= "20 " bold="on"> 

# <string fontname= "Courier New" f ontsize= "20 " bold= "on" >Lincoln D. 
Chaf ee</stringx/p> 

#<p align= ,! lef t " fontname= "Courier New" f ontsize= " 20 " bold= "on" xstring 
fontname=" Courier New" f ontsize= "20 " bold="on"> 

# SUBJECT INDEX TO SPONSORED AND COSPONSORED MEASURES AND 
AMENDMENTS </stringx/p> 

#<p align=" lef t " f ont name= "Courier New" f ontsize= " 20 " bold= "of f " ></p> 
#</header> 

#<p>ACCESS TO HEALTH CARE&tab; S.4 94</p> 
#<p>ACCIDENT PREVENTIONS tab ; S.149, S.936</p> 

sub process_index { 



$line =- m/<p> . +<\/string> ( . +) &tab; (.+)<\/p>/; 
my $ subject = $1; 
my $bill_seq = $2; 

my @bill_list = split ( /,/, $bill_seq ); 

my $sub = $subject; 
$sub =- s/ /_/g; 
$ti_value{ $sub } = 

foreach my $item (@bill_list) { 
$item =- s/\s*//g ; 

my $len = $#topic_index; 

$topic_index [ $len +1 ] = [ ($senid, $line — number , $sub, 



#<p al ign= 11 right " >Administ rat ive procedure- -Department of Health and Human 
Services&tab; S.331, S.1327</p> 

#<p>AGED&tab; S.10, S.51, S.331, S.391, S.472, S.718, S.784, S.792,</p> 

#<p aligns" right "> S.1023, S.1074, S.1142, S.1327, S.1499, S.1678, S.1760</p> 



$ 1 ine_number + + ; 

if ( $line m/<p .*>(.+) &tab; (. +\d) (.*) <\/p>/ ) { 
my $ subject = $1; 
my $bill__seq = $2; 
my $ comma = $3 ; 

my @bill_list = split ( /,/, $bill_seq ); 



$item) ] ; 



N3:while( $line = <SEN106> ) { 
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# IS THERE A CONTINUATION.,, 
while ( $comma eq " , " ) { 

my $buf = <SEN106>; 

$line_number++ ; 

$buf=- m/<p.*>(.+\d) ( -*)<\/p>/; 
my $bill_seq = $1; 
my $ comma - $2; 

my @bill_list = split { /,/, $bill_seq ); 

foreach my $item (@bill_list) { 
$item = - s/\s*//g / 
my $len = $#topic_index; 
$ t op ic_ index [ $len +1 ] = [ ($senid, 
$line_number , $subject / $item) ]; 

} 

if ( $comma eq " " ) { 
goto N3 ; 

} 

} 

} 

#<px/p> 

elsif ( $line m/<p><\/p>/ ) { 

} 

else { 

goto THEEND; 

} 

} 

} 
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<! ELEMENT 
<! ELEMENT 
<! ELEMENT 
<! ELEMENT 
<! ELEMENT 
<! ELEMENT 
<! ELEMENT 
<! ELEMENT 
<! ELEMENT 



SLA_collection = (senate_file*)> 

senate_file = (filename, header_page?, section*, subject_index?)> 
header_page = (senator?, report_period?, prepared_by?)> 



senator = (first_name?, middle_part?, last_name?, state?)> 

report_date = (start_date? s end_date?)> 
section = (sec_number, sec_name, bar*)> 

bar = (bill | resolution | amendment)> 

resolution = (joint_resolution, concurrent_resolution, simple_resolution) 
bill = (bar_id, date introduced, sponsor?, cosponsors?, official_title, 

(latest_status | status_actions), abstract, committee?)> 
<!ELEMENT amendment = (barid, date_introduced, submitted_for, congressional_record, 

sponsor?, submitted_by?, cosponsors, 
statement_of_purpose, (latest_status | status_actions), 
abstract)> 
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# ! /usr/local/bin/perl -w 
use strict; 

# caccf_xml FILE REC_SIZE 
my $REC_SI2E = 164; 

die "*** Record size must be 164 !\n" if ( $ARGV[1] != 164 ); 



my $rec_count = 0; 
my $rec; 
my $ f s i 2 e ; 



open (IN, "< $ARGV[0] n ) || die "can't read from: $ ! " ; 
$fsize = -s IN; 

print "*** WARNING: file size ($fsize) is no multiple of record size 
($REC_SIZE) \n" 

if ( $fsize % $REC_SIZE != 0 ); 

open(OUT,"> $ ARGV [ 0 3 . xml " ) || die "can't write to: $ ! " ; 
open(LOG,"> $ARGV [0] . xml . log" ) || die "can't write to: $ ! " ; 

print OUT "<caccf_records>\n" ; 

while (read (IN, $rec, $REC_SIZE) == $REC_SIZE) { 
print OUT "<rec no=\" " , ++ $rec_count, "\ n "; 
print OUT "ms=\ » " , substr ( $rec, 0 , 1) , " \ " 
print OUT "cc = \ "", substr ( $rec , 1 , 2 ) , " \ " "; 
print OUT " tc=\ "" , substr ( $rec , 3 , 2 ) , " \ « 
print OUT "rn=\ " " , substr ( $rec , 5 , 5) , " \ M " ; 

print LOG "*** ERROR, >" , ( substr ( $rec , 10 , 1 )) , " < instead of blank in #11, 
rec $rec_count : \n [ [$rec] ] \n\n" 



if (substr ($rec, 10, 1) ne " « ) ; 



print 


OUT 


"na= 


\ 


" " , substr ($rec, 


11, 28) 




print 


OUT 


" dp= 


\ 


" " , substr ( $rec ,39,4) , 




print 


OUT 


" sn= 


\ 


" " , substr ($rec, 43 , 9) , 


••\" 


print 


OUT 


" mg= 


\ 


" " , substr ($rec, 


52,4) , 


"\ n 


print 


OUT 


"P9= 


\ 


11 " , substr ($rec, 


56 , 2) , 


"\" 


print 


OUT 


"dd= 


\ 


" " , substr ($rec, 


58, 8) , 


it ^ it 


print 


OUT 


"hc = 


\ 


" " , substr ($rec, 


66, 20) 




print 


OUT 


"hs= 


\ 


" " , substr ($rec, 


86,2) , 


it ^ it 


print 


OUT 


"oc= 


\ 


" " , substr ($rec, 


88, 5) , 


» ^ it 


print 


OUT 


"db= 


\ 


" " , substr ($rec, 


93,8) , 


it ^ it 


print 


OUT 


M rc= 


\ 


" " , substr ($rec, 


101, 1) 




print 


OUT 


"ai = 


\ 


" " , substr ($rec, 


102, 1) 




print 


OUT 


" ra= 


\ 


" " , substr ($rec, 


103, 1) 


, "V 


print 


OUT 


"re = 


\ 


" " , substr ($rec, 


104 , 2) 


, " \ " 


print 


OUT 


"le = 


\ 


" " , substr ($rec, 


106, 2) 


It ^ II 


print 


OUT 


"ma= 


\ 


" " , substr ($rec, 


108, 1) 


, " \ " 


print 


OUT 


"se = 


\ 


" " , substr ($rec, 


109, 1) 


, " \ " 
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# ! /usr/local/bin/perl -w 
use strict ; 

# caccf2oracle FILE REC_SIZE 
my $REC_SIZE = 164; 

#die "*** Record size must be 164 !\n" if ( $ARGV[1] != 164 ) 

my $rec_count = 0; 
my $rec; 
my $fsize; 



my %month 



("01 


ii = 


> "JAN 


" 02 " 


= > 


" FEB " , 


"03" 


= > 


"MAR", 


"04" 


= > 


"APR" , 


"05" 


- > 


"MAY" , 


"06" 


= > 


" JUN" , 


"07" 


= > 


"JUL" , 


"08" 


= > 


"AUG" , 


"09" 


= > 


"SEP" , 


"10" 


= > 


"OCT" , 


"11" 




"NOV" , 


"12" 


-> 


"DEC" 



sub mm_dd_yy { 

my ($a_date) = @_ ; 

if ($a_date =- m| ( [0-9] { 2 } ) / ( [0- 9] { 2 } ) / ( [0-9] { 2 } ) | ) { 

return "TO_DATE ( 1 $2 - $month{ $ 1 } - 19$3 ' ) " ; 
} else { 

print "*** ERROR Rec #$rec_count, mm_dd_yy, not a date »>$a_date<«\n" 
return "NULL" ; 

} 

}; 

sub yymmdd { 

my ($a_date) = @_ ; 

if ($a_date =- m| ( [0-9] {2}) ( [0-9] {2}) ( [0-9] {2}) | ) { 

" TO_DATE ( ' $3-$month{$2} -19$1 ' ) " / 
} else { 

print "*** WARNING Rec #$rec_count, yymmdd, not a date »>$a_date<«\n" ; 
return "NULL" ; 

} ; 

}; 

sub escapeQuote { 

my ($a_string) = @_ ; 

if ($a_string =- s/\»/\'\'/g) { 

print " *** NOTE Rec #$rec_count / quote escaped >>>$a_string<<<\n" ; 

} 

return $a_string; 

} 
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open (OUT, "> $ARGV[0] .sql M ) || die "can't write to: $ ! " ; 
open ( LOG," > $ ARGV [ 0 ] . sql . log " ) || die "can't write to: $!"; 



while (read 

print OUT 

print OUT 

print OUT 

print OUT 

print OUT 

print OUT 

print OUT 

print OUT 

print OUT 

print OUT 

print OUT 

print OUT 

print OUT 

print OUT 

print OUT 

print OUT 

print OUT 

print OUT 

print OUT 

print OUT 

print OUT 

print OUT 

print OUT 

print OUT 

print OUT 

print OUT 

print OUT 

print OUT 

print OUT 

print OUT 

print OUT 

print OUT 

print OUT 

print OUT 

print OUT 

print OUT 

print OUT 

}; 



(IN, $rec, $REC_SIZE) == $REC_SIZE) { 
"insert into CACCF values ("; 
" 1 " , + + $rec_count # internal 

" 1 ", substr ($rec, 0,1) , " • , " ; # ms 
" ' " , substr <$rec, 1, 2) , " ' , " ; # cc 
substr ($rec, 3,2) "; # tc 
"•", substr ($rec, 5,5) ,"', "; # rn 
" • " , escapeQuote (substr ($rec, 11, 28) ) , " f , 

# dp 
# sn 



record number 



# na 



" 1 " ,substr($rec,39,4) , " 
" ' " , substr ($rec, 43 , 9) , " 
" • ", substr ($rec, 52,4) ,"', " ; # mg 

, substr ($rec, 56,2) "; # pg 

mm_dd_yy (substr ($rec, 58, 8) ) , " , " ; # dd 
" 1 " , escapeQuote (substr ($rec, 66, 20) ) , " ' , 11 ; # he 
" • " , substr ($rec, 86, 2) , " 1 
" ' substr ($rec, 88, 5) , " 1 
mm_dd_yy (substr ($rec, 93, 8) ) , " , 11 ; # db 
" ' " , substr ($rec, 101, 1) , " 1 
" » M , substr ($rec, 102, 1) , " 
" ' " , substr ($rec, 103, 1) , " 1 
" 1 " , substr ($rec, 104, 2) , » ' 
" ' " , substr ($rec, 106, 2) , " ' 
" • " , substr ($rec, 108, 1) , 1 
" ' " , substr ($rec, 109, 1) , " ' 
" • " , substr ($rec, 110, 1) , " ' 
" ■ " , substr ($rec, 111, 1) , 
yymmdd (substr ( $rec , 112 , 6) 
" ' " , substr ($rec, 118, 1) , " ' 
" ' " , substr ($rec, 119, 3 ) , " ' 
" ' " , substr ($rec, 122, 2) , " ' 
" • " , substr ($rec, 124, 1) , " ' 
" 1 11 , escapeQuote (substr ($rec, 
" ' " , escapeQuote (substr {$rec, 154 , 2) ) , " ' , " ; # ty 
" ' " , escapeQuote ( substr ($rec, 156, 2) ) , " 1 , " ; # pc 
"'" , substr ($rec, 158, 2) , ; # mc 

substr ($rec, 160, 2) ,"'," ; # pr 
"'", substr ($rec, 162, 2) , " ' " ; # fl 
") ;\n"; 



i 


# hs 


i 


# oc 


) 


ii 


it . 








II 


• # 


rc 


M 


• # 


ai 


M 


• # 


ra 


tl 


# 


re 


tl 


# 


le 


II 




ma 


II 


# 


se 


II 


# 


ci 


II 


# 


PP 


t 


i ii 


; # 


It 


# 


lr 


It 




br 


It 


# 


ag 


It 


# 


sc 



dt 



125,29) ) , " ' , " ; # CO 



print LOG "[$ARGV[03: read " , $rec_count*$REC_SIZE , " bytes = $rec_count 
records x $REC_SIZE] \n" ; 

print LOG "*** WARNING : file size = $fsize\n" 
if ( $rec_count*$REC_SIZE != $fsize ) ,* 
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4000 TRANSFORMING REPRESENTATION 

OF DATA OBJECTS INTO SELF- 
DESCRIBING, INFRASTRUCTURE- 
INDEPENDENT REPRESENTATION OF 
DATA OBJECTS 



4002 



VERIFYING TRANSFORMED DATA 
OBJECTS USING KNOWLEDGE 
RELEVANT TO COLLECTION 



ARCHIVING SELF-DESCRIBING, INFRASTRUCTURE- 
INDEPENDENT REPRESENTATION OF DATA OBJECTS 
WITH SELF-DESCRIBING, INFRASTRUCTURE- 
INDEPENDENT REPRESENTATION OF LOGICAL 

STRUCTURE OF COLLECTION AND SELF- 
DESCRIBING, INFRASTRUCTURE-INDEPENDENT 
REPRESENTATION OF KNOWLEDGE 
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RETRIEVING FROM ARCHIVE SELF- 
DESCRIBING, INFRASTRUCTURE- 
INDEPENDENT REPRESENTATION OF 
LOGICAL STRUCTURE OF COLLECTION 



RETRIEVING FROM THE ARCHIVE A 
SELF-DESCRIBING, INFRASTRUCTURE- 
INDEPENDENT REPRESENTATION OF 
KNOWLEDGE RELEVANT TO THE 
COLLECTION 



CREATING QUERY-ABLE MECHANISM 
IN ACCORDANCE WITH LOGICAL 
STRUCTURE OF COLLECTION 



RETRIEVING FROM THE ARCHIVE A 
SELF-DESCRIBING, INFRASTRUCTURE- 
INDEPENDENT REPRESENTATION OF 
DATA OBJECTS 



VERIFYING THAT THE DATA OBJECTS 
ARE CONSISTENT WITH THE 
KNOWLEDGE RELEVANT TO THE 
COLLECTION 



LOADING DATA OBJECTS INTO QUERY- 
ABLE MECHANISM 
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4112 



RETRIEVING FROM ARCHIVE SELF- 
DESCRIBING, INFRASTRUCTURE- 
INDEPENDENT REPRESENTATION OF 
PRESENTATION MECHANISM FOR ONE 
OR MORE DATA OBJECTS 



4114 



RETRIEVING ONE OR MORE DATA 
OBJECTS FROM QUERY-ABLE 
MECHANISM 



4116 



VERIFYING THAT THE ONE OR MORE 

DATA OBJECTS ARE CONSISTENT 
WITH KNOWLEDGE RELEVANT TO THE 
COLLECTION 



4118 



PRESENTING THE ONE OR MORE 
DATA OBJECTS USING THE 
PRESENTATION MECHANISM 
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4200 



RETRIEVING FROM ARCHIVE SELF- 
DESCRIBING, INFRASTRUCTURE- 
INDEPENDENT REPRESENTATION OF 
KNOWLEDGE RELEVANT TO THE 
COLLECTION 



4202 



USING THE KNOWLEDGE TO VALIDATE 
THE COLLECTION 
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4300. 



4310 



4308 



ACCESSION 




CLOSURE 




ATTRIBUTE 


TEMPLATE 




CONCEPT/ATTRIBUTE 




INVERSE INDEXING 



4304 



4306 



4302 



ATTRIBUTE 




ATTRIBUTE 




OCCURRENCE 


SELECTION 




TAGGING 




TAGGING 
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4400 



RETRIEVING FROM ARCHIVE SELF- 
DESCRIBING, INFRASTRUCTURE- 
INDEPENDENT. OR EXECUTABLE 

REPRESENTATION OF 
TRANSFORMATION PROCEDURE 
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4402 



EXECUTING THE PROCEDURE TO 
TRANSFORM DATA RECORDS INTO A 
SELF-DESCRIBING, INFRASTRUCTURE- 
INDEPENDENT REPRESENTATION OF 
DATA OBJECTS 



FIGURE 44A 



4404 



RETRIEVING FROM ARCHIVE SELF- 
DESCRIBING, INFRASTRUCTURE- 
INDEPENDENT, OR EXECUTABLE 

REPRESENTATION OF 
TRANSFORMATION PROCEDURE 



4406 



RETRIEVING FROM ARCHIVE SELF- 
DESCRIBING, INFRASTRUCTURE- 
INDEPENDENT REPRESENTATION OF 
DATA OBJECTS 



EXECUTING THE PROCEDURE TO 
TRANSFORM SELF-DESCRIBING, 
INFRASTRUCTURE-INDEPENDENT 

REPRESENTATION OF DATA OBJECTS 
INTO A FORM CAPABLE OF BEING 

INSTANTIATED ONTO A QUERY-ABLE 
MECHANISM 
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RETRIEVING FROM ARCHIVE SELF- 
DESCRIBING, INFRASTRUCTURE- 
INDEPENDENT, OR EXECUTABLE 

REPRESENTATION OF 
TRANSFORMATION PROCEDURE 



RETRIEVING FROM ARCHIVE SELF- 
DESCRIBING. INFRASTRUCTURE- 
INDEPENDENT REPRESENTATION OF 
DATA OBJECTS 



EXECUTING THE PROCEDURE TO 
TRANSFORM SELF-DESCRIBING, 
INFRASTRUCTURE-INDEPENDENT 
REPRESENTATION OF DATA OBJECTS 
INTO OCCURRENCES OF ATTRIBUTE 
OR ELEMENT VALUES 



FIGURE 44C 



RECEIVING DATA RECORDS TAGGED 
WITH ATTRIBUTE OR ELEMENT NAMES 



FORMING FROM THE TAGGED DATA 

RECORDS OCCURRENCES OF 
ATTRIBUTE OR ELEMENT VALUES 



FIGURE 45 
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AT LEAST ONE REPRESENTATION OF COLLECTION 



AT LEAST ONE SELF-DESCRIBING, INFRASTRUCTURE 
INDEPENDENT, OR EXECUTABLE SPECIFICATION OF 
ONE OR MORE TRANSFORMATIONS RELEVANT TO 

COLLECTION 



AT LEAST ONE SELF-DESCRIBING, INFRASTRUCTURE- 
INDEPENDENT, OR EXECUTABLE SPECIFICATION OF 
ONE OR MORE RULES RELEVANT TO THE 
COLLECTION 



SELF-DESCRIBING, INFRASTRUCTURE-INDEPENDENT 
REPRESENTATION OF PRESENTATION MECHANISM 

(OPTIONAL) 



FIGURE 46 
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%%% Rules for (f ELEMENT X (Y : Z) ) 

false P;X. not (P 1): Y 

false <-P:X. not (P2):Z. 

false <-P:X, not P[--^} 

false <- P : X[N->.]. not IM=l, not N=2. 

%%% Rules for {fELEMENT X (Y | Z) > 

false <- P : X{1-^A|. not A • Y, not A :Z 

false <-P:X p not P[--»-j 

false <-P;X[N-+-]. not IM-1. 

%%% Rule for (! ELEMENT X (Y) m ) 

false <- P : X{.-*q, not C : Y 



% 1st child b not Y 
% 2nd child is not Z 
% Ihti* are no children 
% there are other children 

1st child ulher than Y or Z 
% there am no children 
% a rton-ist child 

% a non- Y diiid 
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5000 



RETRIEVING FROM ARCHIVE SELF- 
DESCRIBING, INFRASTRUCTURE- 
INDEPENDENT, OR EXECUTABLE 

SPECIFICATION OF ONE OR MORE 
TRANSFORMATIONS 



5002 



RETRIEVING FROM ARCHIVE ONE OR 
MORE DATA OBJECTS FROM THE 
COLLECTION 



J 

5004 EXECUTING THE SPECIFICATION TO 

AUTOMATICALLY PLACE THE ONE OR 
^> MORE DATA OBJECTS INTO A FORM 
SUITABLE FOR INSTANTIATION ONTO 
A QUERY-ABLE MECHANISM 



FIGURE 50 



5100 



RETRIEVING FROM ARCHIVE SELF- 
DESCRIBING, INFRASTRUCTURE- 
INDEPENDENT, OR EXECUTABLE 

SPECIFICATION OF ONE OR MORE 
RULES 



5102 



EXECUTING THE SPECIFICATION TO 
AUTOMATICALLY VALIDATE THE 
COLLECTION 



FIGURE 51A 
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« 



5104 



PRODUCING OCCURRENCES OF 
ATTRIBUTE OR ELEMENT VALUES 



51 06 DETERMINING THAT THE 

OCCURRENCES ARE CONSISTENT 
WITH THE RULES ENCODED BY THE 
SPECIFICATION AND ANY VALID 
EXCEPTIONS 



FIGURE 51 B 



5200 



RETRIEVING FROM ARCHIVE SELF- 
DESCRIBING, INFRASTRUCTURE- 
INDEPENDENT, OR EXECUTABLE 
SPECIFICATION OF OF ONE OR MORE 
TRANSFORMATIONS 



5202 



RETRIEVING FROM ARCHIVE ONE OR 
MORE DATA OBJECTS FROM THE 
COLLECTION 



5204 



EXECUTING THE SPECIFICATION TO 
AUTOMATICALLY PLACE THE ONE OR 
MORE DATA OBJECTS INTO A FORM 
SUITABLE FOR PRESENTATION 



FIGURE 52 
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# An excerpt of an example of a Topic Map for the SLA (Senate) 

# Collection. 
# 

# 4 Topics are shown: t1 , t2, t3, and t4 of type "SubjectEntry" 

# These are actually Subject Index Entries found in the 

# raw data 

# 

# For each topic, there is an occurence list of locator elements 

# corresponding to the bills that discuss that topic. 

# 

# In addition, topics are related to each other through associations. 

# Here we created two types of associations: 

# <assoc types="CoDiscussedlnExactlyOneBiir> 

# <assoc types= ,, CoDiscussedlnTwoOrMoreBills"> 

# 

# showing the "degree of connectedness" between two topics. 

# These would be value-added relationships, as they are implicit 

# in the raw data, and discovered by our topic map building 

# routines. 
# 

# Bertram Ludaescher & Richard Marciano -- March 20, 2001 




<!DOCTYPE topicmap [ 
<! ELEMENT topicmap (topic | assoc )* > 
<! ELEMENT topic (topname | occurs)* > 
<!ATTLIST topic 
id ID #REQUIRED 
types CDATA #IMPLIED 
> 

<! ELEMENT topname (basename, dispname, sortname)> 
<! ELEMENT basename (#PCDATA) > 
<!ELEMENT dispname (#PCDATA) > 
<!ELEMENT sortname (#PCDATA) > 
<! ELEMENT occurs (locator*) > 
<!ELEMENT locator EMPTY > 
<!ATTLIST locator 
role CDATA #REQUIRED 
href CDATA #REQUIRED 
> 

<! ELEMENT assoc (assocrl*) > 
<!ATTLIST assoc 
types CDATA #IMPLIED 
> 

<!ELEMENT assocrl EMPTY > 
<!ATTLIST assocrl 
role CDATA #REQUIRED 
href CDATA #REQUIRED 



]> 



54A 



54B 



54C 
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<topicmap> 
<topic id="t1" types="SubjectEntry M > 
<topname> 

<basename>Apartment houses</basename> 
<dispname>Apt. Houses</dispname> 
<sortname>APARTMENTHOUSES</sortname> 
</topname> 
<occurs> 

<locator role="Discussedln" href ="#S. 463" /> 
</occurs> 
</topic> 

<topic id="t2" types="SubjectEntry"> 
<topname> 
<basename>Children</basename> 

<dispname>Child.</dispname> 
<sortname>CHILDREN</sortname> 
</topname> 
<occurs> 

<locator role="Discussedln" href="#S.300" /> 
<locator role="Discussedln" href="#S.463" /> 
<locator role="Discussedln H href="#S.1638" /> 
<locator role="Discussedln" href="#S.1673" /> 
<locator role="Discussedln" href="#S.1709" /> 
<locator role="Discussedln" href="#S.Res.125" /> 
<locator role="Discussedln" href="#S.Res.258 M /> 



<topic id="t3" types="SubjectEntry"> 
<topname> 
<basename>Welfare</basename> 

<dispname>Welf.</dispname> 
<sortname>WELFARE</sortname> 
</topname> 
<occurs> 

<locator role="Discussedln H href="#S.463" /> 
<locator role= H Discussedln" href= H #S.1277" /> 
<locator role=' , Discussedln M href= M #S.1709" /> 
<locator role="Discussedln" href="#S.Con.Res.28" /> 
<locator role="Discussedln" href="#S.Res.125" /> 
<locator role="Discussedln M href="#S.Res.260" /> 



</occurs> 
</topic> 



</occurs> 
</topic> 
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<topic id="t4 H types="SubjectEntry"> 
<topname> 

<basename>Youth employment</basename> 

<dispname> Youth empl.</dispname> 
<sortname>YOUTEMPLOYMENT</sortname> 
</topname> 
<occurs> 

<locator role= M Discussedln" href="#S.463" /> 
</occurs> 
</topic> 

<assoc types="CoDiscussedlnExactlyOneBill"> 
<assocrl role-'DiscussedlnSameBill" href="t1" /> 
<assocrl role="DiscussedlnSameBill" href="t2" /> 
<assocrl role="DiscussedlnSameBiir href="t3" /> 
<assocrl role="DiscussedlnSameBill H href="t4" /> 
</assoc> 

<assoc types="CoDiscussedlnTwoOrMoreBiils ,, > 
<assocrl role="DiscussedlnSameBiH" href="t2 M /> 
<assocrl role="DiscussedlnSameBill" href="t3" /> 
</assoc> 

</topicmap> 



FIGURE 54C 



